In [1]:
# Imports
from config.config import Config
from llm_models.llm_models import llm, embeddings_model
from vectordb.chunking import DocumentChunker
from vectordb.vector_ops import PineconeVectorDB
from prompts.prompts import QuizGenerationPrompts


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initialize Chunker
chunker = DocumentChunker()
print(f"Chunk size: {chunker.chunk_size}, Overlap: {chunker.chunk_overlap}")


Chunk size: 800, Overlap: 100


In [3]:
# Test Chunker with file
chunks = chunker.process_file("videx.pdf")
print(f"Chunks: {len(chunks)}")


Chunks: 17


In [4]:
# texts = [chunk.page_content for chunk in chunks]



In [5]:
# Create Embedding
text = "Machine learning is a subset of AI."
embedding = embeddings_model.embed_query(text)
print(f"Dimension: {len(embedding)}")


Dimension: 3072


In [16]:
# Initialize Pinecone
vector_db = PineconeVectorDB()
print(f"Index: {vector_db.index_name}")


Index: quiz-generator


In [17]:
# Add chunks to Pinecone
vector_db.add_documents(chunks, namespace="test")


['d8a7c22f-a425-4c20-a979-66474f5592c7',
 '5810e477-b391-4aed-b7be-2c5d7fab6b05',
 'c5e3a710-3905-48b9-b2f7-59a897b83ad4',
 '3a254d54-f89a-4d8b-bfba-77796a403da7',
 '48f1548d-e92c-4f12-bb6c-7d4ce284cc7d',
 '1f8f1387-9155-46b3-829d-27208c74020b',
 '55a08bab-a634-4d8f-8dfa-a82e52074e46',
 '3861a08e-f336-4d01-a598-195450db34b1',
 'b37693da-1f50-43f7-b709-f8191a53780b',
 '70ef17bb-b34c-44a2-9d46-bb90668be5a2',
 'a92092ca-8e94-4f50-8ab1-d4c954c2b42d',
 '9fee723c-edcb-4c2b-ac38-884e90ca65fe',
 'c27b5882-0bd3-41a1-9dcb-78143faa6852',
 '15c2a27d-17bf-4030-ae33-8da4df725ebd',
 '708a776c-10df-4297-bb5f-ca3b19ea4558',
 '3d5b15f7-7bf4-449f-831d-bba74c318404',
 'b10d22fa-4c0f-4bd9-b2e4-0a36b8ad70be']

In [None]:
# Similarity Search
results = vector_db.similarity_search("what is the main topic?", k=3, namespace="test")
for r in results:
    print(r.page_content[:100])


In [9]:
# Initialize LLM
print(f"Model: {llm.model_name}, Temp: {llm.temperature}")


Model: gpt-4o-mini, Temp: 0.3


In [12]:
# Test LLM
response = llm.invoke("hi")
print(response.content)


Hello! How can I assist you today?


In [19]:
# Test Quiz Generation with Prompt
content = "\n".join([r.page_content for r in results])
prompt = f"Generate 1 MCQ from:\n{content}\nReturn JSON."
messages = [
    {"role": "system", "content": QuizGenerationPrompts.SYSTEM_PROMPT},
    {"role": "user", "content": prompt}
]
response = llm.invoke(messages)
print(response.content)


```json
{
  "question": {
    "type": "multiple_choice",
    "question_text": "What is the primary purpose of the stay in the Federal Republic of Germany as indicated in the document?",
    "options": [
      {
        "option_text": "Employment",
        "is_correct": false
      },
      {
        "option_text": "Tourism",
        "is_correct": false
      },
      {
        "option_text": "Study",
        "is_correct": true
      },
      {
        "option_text": "Family reunion",
        "is_correct": false
      }
    ],
    "difficulty": "easy"
  }
}
```


In [14]:
# Delete Index
vector_db.delete_index()
print("Deleted")


Deleted
