In [1]:
import sys
import os
# 1. Get the current directory of the notebook (e.g. .../ai-research-assistant/notebooks)
# 2. Go up one level to the project root (..)
# 3. Add that root to Python's search list (sys.path)
sys.path.append(os.path.abspath(os.path.join('..')))

In [2]:
from src.utils.config import Config
from src.document_loader import DocumentLoader, Document
from src.vector_store import VectorStore
from groq import Groq

print("âœ… Imports successful!")

EMBEDDING_PROVIDER: local
EMBEDDING_MODEL: multi-qa-distilbert-cos-v1
VECTOR_SIZE: 768
âœ… Imports successful!


In [3]:
document = Document(
     "Python is a high-level programming language. It's great for beginners and experts alike.", 
    {"source_type": "test", "title": "Python Intro"}
)
print(vars(document))

{'content': "Python is a high-level programming language. It's great for beginners and experts alike.", 'metadata': {'source_type': 'test', 'title': 'Python Intro'}}


In [6]:
project_root = Config.PROJECT_ROOT
loader = DocumentLoader()
docs = []
# Load PDF
pdf_path = os.path.join(project_root, "data", "IBM SkillsBuild_AI Experiential Learning Lab_2025_Guide.pdf")
#pdf_path = "D:/Learning/personal-research-assistant-with-multi-source-rag/ai-research-assistant/data/IBM SkillsBuild_AI Experiential Learning Lab_2025_Guide.pdf"
if os.path.exists(pdf_path):
    print(f"   Loading PDF: {pdf_path}")
    pdf_docs = loader.load_pdf(pdf_path)[:3] # Load first 3 pages for speed
    docs.extend(pdf_docs)

# Load Web
web_url = "https://en.wikipedia.org/wiki/Artificial_intelligence"
print(f"   Loading Web: {web_url}")
docs.append(loader.load_web_page(web_url))
        
# Load YouTube
yt_url = "https://www.youtube.com/watch?v=aircAruvnKk"
print(f"   Loading YouTube: {yt_url}")
docs.append(loader.load_youtube_transcript(yt_url))

   Loading PDF: D:\Learning\personal-research-assistant-with-multi-source-rag\ai-research-assistant\data\IBM SkillsBuild_AI Experiential Learning Lab_2025_Guide.pdf
   Loading Web: https://en.wikipedia.org/wiki/Artificial_intelligence
   Loading YouTube: https://www.youtube.com/watch?v=aircAruvnKk


In [7]:
docs

[Document(source=pdf, length=114),
 Document(source=pdf, length=1398),
 Document(source=pdf, length=6128),
 Document(source=web, length=216950),
 Document(source=youtube, length=18430)]

In [8]:
vector_store = VectorStore(in_memory=True)
vars(vector_store)

Initializing collection: research_documents
[OK] Created collection: research_documents


{'collection_name': 'research_documents',
 'qdrant_client': <qdrant_client.qdrant_client.QdrantClient at 0x2fdda1a3f10>,
 'groq_client': <groq.Groq at 0x2fdda1543d0>,
 'openai_client': None,
 'local_model': SentenceTransformer(
   (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'DistilBertModel'})
   (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
   (2): Normalize()
 )}

In [9]:
groq_client = Groq(api_key=Config.GROQ_API_KEY)
print("âœ… Components initialized!")

âœ… Components initialized!


In [38]:
test_content = """
Retrieval-Augmented Generation (RAG) is a technique that combines information retrieval 
with text generation. It works by first retrieving relevant documents from a knowledge base, 
then using those documents as context for generating responses.

The main benefits of RAG include:
1. Access to up-to-date information
2. Reduced hallucinations
3. Ability to cite sources
4. Cost-effective compared to fine-tuning

RAG is particularly useful for question-answering systems, chatbots, and research assistants.
"""

doc = Document(
    content=test_content,
    metadata={"source_type": "test", "title": "RAG Introduction"}
)

In [39]:
doc

Document(source=test, length=509)

In [10]:
vector_store.add_documents(docs)
print("âœ… Document added to vector store!")

  0%|          | 0/5 [00:00<?, ?it/s]

   [AI Research Assistant] Using sliding window chunking for document: False
   [AI Research Assistant] Using sliding window chunking for document: False
   [AI Research Assistant] Using sliding window chunking for document: False
   [AI Research Assistant] Using sliding window chunking for document: False
   [AI Research Assistant] Using sliding window chunking for document: False
[OK] Added 408 chunks from 5 documents
âœ… Document added to vector store!


In [11]:
# Test query
question = input()
results = vector_store.search(question, top_k=2)

print(f"\nQuery: {question}\n")
for i, result in enumerate(results, 1):
    print(f"Result {i} (score: {result['score']:.3f}):")
    print(f"{result['text']}\n")
results

 What is Artificial intelligence



Query: What is Artificial intelligence

Result 1 (score: 0.643):
ed in 2022.
[
398
]
According to PitchBook research, 22% of newly funded
startups
in 2024 claimed to be AI companies.
[
399
]
Philosophy
Main article:
Philosophy of artificial intelligence
Philosophical debates have historically sought to determine the nature of intelligence and how to make intelligent machines.
[
400
]
Another major focus has been whether machines can be conscious, and the associated ethical implications.
[
401
]
Many other topics in philosophy are relevant to AI, such as
epistemology
and
free will
.
[
402
]
Rapid advancements have intensified public discussions on the philosophy and
ethics of AI
.
[
401
]
Defining artificial intelligence
See also:
Synthetic intelligence
,
Intelligent agent
,
Artificial mind
,
Virtual intelligence
, and
Dartmouth workshop
Alan Turing
wrot

Result 2 (score: 0.638):
 define the goal of their field as making 'machines that fly so exactly like
pigeons
that they can fool oth

[{'text': 'ed in 2022.\n[\n398\n]\nAccording to PitchBook research, 22% of newly funded\nstartups\nin 2024 claimed to be AI companies.\n[\n399\n]\nPhilosophy\nMain article:\nPhilosophy of artificial intelligence\nPhilosophical debates have historically sought to determine the nature of intelligence and how to make intelligent machines.\n[\n400\n]\nAnother major focus has been whether machines can be conscious, and the associated ethical implications.\n[\n401\n]\nMany other topics in philosophy are relevant to AI, such as\nepistemology\nand\nfree will\n.\n[\n402\n]\nRapid advancements have intensified public discussions on the philosophy and\nethics of AI\n.\n[\n401\n]\nDefining artificial intelligence\nSee also:\nSynthetic intelligence\n,\nIntelligent agent\n,\nArtificial mind\n,\nVirtual intelligence\n, and\nDartmouth workshop\nAlan Turing\nwrot',
  'score': 0.6429539412684954,
  'metadata': {'chunk_index': 144,
   'total_chunks': 362,
   'source_type': 'web',
   'source_url': 'https:

In [13]:
def rag_query(question: str) -> str:
    """Answer question using RAG"""
    # 1. Retrieve relevant context
    results = vector_store.search(question, top_k=3)
    context = "\n\n".join([r["text"] for r in results])
    
    # 2. Create prompt with context
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant. Answer questions based on the provided context. If the context doesn't contain the answer, say so."
        },
        {
            "role": "user",
            "content": f"Context:\n{context}\n\nQuestion: {question}"
        }
    ]
    
    # 3. Generate response
    response = groq_client.chat.completions.create(
        model=Config.GROQ_MODEL,
        messages=messages,
        max_tokens=Config.MAX_TOKENS,
        temperature=Config.TEMPERATURE
    )
    
    return response.choices[0].message.content

# Test it
answer = rag_query("What is Artificial intelligence and what are its benefits?")
print(f"Question: What is Artificial intelligence and what are its benefits?\n")
print(f"Answer: {answer}")

Question: What is Artificial intelligence and what are its benefits?

Answer: Artificial intelligence (AI) is a field that draws upon various disciplines such as formal logic, artificial neural networks, statistics, operations research, economics, psychology, linguistics, philosophy, and neuroscience to create intelligent machines that can complete cognitive tasks at least as well as humans.

The benefits of AI are numerous and diverse, including:

1. **Predictive analytics**: AI can analyze large amounts of data to make predictions and forecasts, such as in astronomy, where it is used to forecast solar activity and discover exoplanets.
2. **Automation**: AI can automate tasks, such as in greenhouses, where it can detect diseases and pests, and save water.
3. **Classification and regression**: AI can classify and regress data, such as in livestock pig call emotions, where it can classify emotions and improve animal welfare.
4. **Space exploration**: AI can analyze data from space missi

In [14]:
print("\nðŸ’° Estimated cost for this notebook: ~$0.001")
print("Check your usage at: https://groq.com/usage")


ðŸ’° Estimated cost for this notebook: ~$0.001
Check your usage at: https://groq.com/usage
