In [1]:
import json
import torch
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sklearn.metrics.pairwise import cosine_similarity

In [28]:
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [29]:
docs_file = "docs.jsonl"
documents = []
with open(docs_file, "r", encoding="utf-8") as f:
    for line in f:
        record = json.loads(line)
        title = record.get("title", "Untitled")
        text = record.get("text", "")
        documents.append(f"{title} : {text}")

In [30]:
documents

['OrionSearch : It supports field-level boosting and phrase queries. The query language supports proximity operators using `~k` syntax. OrionSearch serves as a full-text search engine with BM25 ranking.',
 'OrionSearch : Synonym expansion is configured in `orion.yml`. The indexer uses incremental segment merges.',
 'OrionSearch : OrionSearch community survey results will be published next quarter.',
 'OrionSearch : Users on forums discussed OrionSearch tutorials and workshop schedules.',
 'OrionSearch : OrionSearch release notes mention minor UI tweaks and logging improvements.',
 'ANLP : It provides sentence splitting, NER, and lemmatization for 50+ languages. Dictionaries are configured in `atlas.yml`. AtlasNLP has a rule-based fallback for rare tokens.',
 'ANLP : AtlasNLP is a multilingual text processing suite. Models are packaged in ONNX format for portability.',
 'Atlas NLP : Atlas NLP community survey results will be published next quarter.',
 'Nebula DB : It supports JSON docum

In [31]:
doc_embeddings = embedder.encode(documents, convert_to_tensor=True, normalize_embeddings=True, device='cpu')

In [32]:
doc_embeddings[0]

tensor([-9.6050e-03, -3.1252e-03,  9.2519e-03, -1.1618e-02, -4.1495e-02,
         3.2064e-02,  1.6411e-02,  1.2942e-03, -2.3323e-02,  4.2615e-03,
         1.3207e-02,  2.2988e-02,  5.1102e-02, -1.2245e-02,  4.6949e-02,
         6.2586e-02,  1.6964e-02,  6.2505e-02, -2.8696e-02, -8.7659e-02,
         7.4998e-03,  7.1737e-02,  3.4170e-02, -1.5578e-02,  2.9954e-03,
         1.2970e-02, -1.6670e-02, -5.1083e-05,  4.7281e-02,  7.1144e-03,
        -6.5227e-02,  2.8838e-02,  4.6683e-02,  7.9088e-02, -6.4427e-02,
         1.8344e-02, -6.3278e-02, -2.4537e-02, -6.3762e-02, -5.1528e-02,
        -7.1323e-02, -3.5413e-02,  3.0813e-02, -3.9654e-03,  3.0141e-02,
        -7.3448e-03, -1.0544e-01,  1.2112e-02,  9.1032e-03,  4.0452e-02,
        -1.6525e-01, -5.0537e-02, -3.2600e-02,  3.9372e-02,  3.7440e-02,
         1.0008e-01, -6.1578e-02, -6.1972e-02, -3.2912e-02, -9.1542e-02,
        -5.3954e-03, -4.1354e-02, -2.1714e-02, -3.8042e-03,  2.0406e-02,
        -4.2889e-02,  5.5261e-02,  1.8164e-02,  5.6

In [9]:

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use cuda:0


In [33]:
with open("questions.json", "r", encoding="utf-8") as f:
    questions = json.load(f)

In [34]:
questions

[{'id': 'q0',
  'question': 'What ranking function does OrionSearch use?',
  'answers': ['BM25', 'bm25']},
 {'id': 'q1',
  'question': 'State a key feature of OrionSearch.',
  'answers': ['supports', 'field-level', 'boosting', 'phrase', 'queries']},
 {'id': 'q2',
  'question': 'State a key feature of OrionSearch.',
  'answers': ['indexer', 'uses', 'incremental', 'segment', 'merges']},
 {'id': 'q3',
  'question': 'State a key feature of OrionSearch.',
  'answers': ['synonym', 'expansion', 'configured', 'orion.yml']},
 {'id': 'q4',
  'question': 'State a key feature of OrionSearch.',
  'answers': ['query',
   'language',
   'supports',
   'proximity',
   'operators',
   'using']},
 {'id': 'q5',
  'question': 'State a key feature of AtlasNLP.',
  'answers': ['atlasnlp', 'multilingual', 'text', 'processing', 'suite']},
 {'id': 'q6',
  'question': 'State a key feature of AtlasNLP.',
  'answers': ['provides',
   'sentence',
   'splitting',
   'lemmatization',
   'languages']},
 {'id': 'q7',


In [38]:
rag_answers = {}

for i, q in enumerate(questions):
    question_text = q if isinstance(q, str) else q.get("question", "")


    query_embedding = embedder.encode(question_text, convert_to_tensor=True, normalize_embeddings=True, device='cpu')

    cos_scores = cosine_similarity(
        query_embedding.cpu().numpy().reshape(1, -1),
        doc_embeddings.cpu().numpy()
    )[0]
    top_indices = np.argsort(cos_scores)[::-1][:5]
    top_docs = [documents[idx] for idx in top_indices]
    context = "\n".join(top_docs)


    print(f"\nQuestion {i+1}: {question_text}")
    print("Retrieved context:")
    for doc in top_docs:
        print("-", doc)

    prompt = f"""
Answer the following question using ONLY the context provided.
Give a short, clear answer.

Context:
{context}

Question:
{question_text}

Answer:
"""

    output = generator(prompt, max_new_tokens=50, do_sample=True, temperature=0.7)
    answer = output[0]["generated_text"].split("Answer:")[-1].strip()

    rag_answers[f"q{i}"] = answer
    print("Generated answer:", answer)
    print("-"*60)




Question 1: What ranking function does OrionSearch use?
Retrieved context:
- OrionSearch : OrionSearch community survey results will be published next quarter.
- OrionSearch : It supports field-level boosting and phrase queries. The query language supports proximity operators using `~k` syntax. OrionSearch serves as a full-text search engine with BM25 ranking.
- OrionSearch : Users on forums discussed OrionSearch tutorials and workshop schedules.
- OrionSearch : OrionSearch release notes mention minor UI tweaks and logging improvements.
- OrionSearch : Synonym expansion is configured in `orion.yml`. The indexer uses incremental segment merges.
Generated answer: OrionSearch uses BM25 ranking, which is a hybrid algorithm that combines two popular ranking functions, Boosted Term Frequency (BM25) and Incremental Term Frequency (ITF).
------------------------------------------------------------

Question 2: State a key feature of OrionSearch.
Retrieved context:
- OrionSearch : Users on for

In [39]:

with open("rag_answers.json", "w", encoding="utf-8") as f:
    json.dump(rag_answers, f, indent=2, ensure_ascii=False)

print("✅ All answers saved to rag_answers.json")

✅ All answers saved to rag_answers.json
