## Step 1: Import Required Libraries


In [None]:
# only use this if you get an error about cython and on google colab environment
# 1. Install an older version of Cython
#%pip install "cython<3.0.0"

# 2. Install PyYAML 5.1 without build isolation so it uses the pinned Cython
#%pip install --no-build-isolation PyYAML==5.1

# 3. Install frontmatter (it will now see the correct PyYAML already installed)
#%pip install frontmatter


In [14]:
## URL Format : https://github.com/<owner>/<repository>/archive/refs/heads/<branch_name>.zip
import io
import zipfile
import requests
import frontmatter

## Step 2: Download the Repository
- GitHub's ZIP URL format:
https://codeload.github.com/{owner}/{repo}/zip/refs/heads/{branch}


In [15]:
url = 'https://codeload.github.com/fsamura01/task-manager-app/zip/refs/heads/main'
resp = requests.get(url)
resp

<Response [200]>

## Step 3: Process the ZIP File in Memory


In [18]:
repository_data = []

# Create a ZipFile object from the downloaded content
zf = zipfile.ZipFile(io.BytesIO(resp.content))
# Iterate over files in the zip
for file_info in zf.infolist():
    # Get the filename
    filename = file_info.filename.lower()

    # Only process markdown files
    if not filename.endswith('.md'):
        continue

    # Read and parse each file
    with zf.open(file_info) as f_in:
        content = f_in.read()
        post = frontmatter.loads(content)
        data = post.to_dict()
        data['filename'] = filename
        repository_data.append(data)

zf.close()

In [19]:
print(f"Total documents extracted: {len(repository_data)}")

Total documents extracted: 3


In [20]:
# Look at multiple documents to find one with frontmatter
for i, doc in enumerate(repository_data[:5]):
    print(f"\n--- Document {i} ---")
    print(f"Filename: {doc.get('filename')}")
    print(f"Keys: {list(doc.keys())}")
    if 'question' in doc:
        print(f"Question: {doc.get('question')}")
        break


--- Document 0 ---
Filename: task-manager-app-main/readme.md
Keys: ['content', 'filename']

--- Document 1 ---
Filename: task-manager-app-main/client/readme.md
Keys: ['content', 'filename']

--- Document 2 ---
Filename: task-manager-app-main/server/readme.md
Keys: ['content', 'filename']


In [21]:
# Find the document with the question
doc = repository_data[0]
print(doc)

{'content': '# Task Manager App\n\nA React-based task management application that allows users to create, edit, delete, and track tasks with due dates and completion status.\n\n## Features\n\n### Task Management\n- **Create Tasks**: Add new tasks with title, description, and due date\n- **Edit Tasks**: Modify existing tasks with inline editing\n- **Delete Tasks**: Remove tasks with confirmation dialog\n- **Toggle Completion**: Mark tasks as complete/incomplete with one click\n\n### User Experience\n- **Task Statistics**: View total, incomplete, and completed task counts\n- **Visual Feedback**: Different styling for completed vs incomplete tasks\n- **Loading States**: Clear feedback during API operations\n- **Error Handling**: Comprehensive error messages and validation\n\n### Form Validation\n- **Title**: Required, minimum 3 characters\n- **Description**: Required\n- **Due Date**: Required, cannot be in the past (for incomplete tasks)\n- **Real-time Validation**: Clear errors as user t

## Step 5: Support Multiple Markdown Types
- To include .mdx files (React markdown):



In [7]:
for file_info in zf.infolist():
    filename = file_info.filename.lower()

    if not (filename.endswith('.md') or filename.endswith('.mdx')):
        continue


## Step 6: Complete Reusable Function
- Here's the production-ready version with error handling:

In [22]:
import io
import zipfile
import requests
import frontmatter

def read_repo_data(repo_owner, repo_name):
    """
    Download and parse all markdown files from a GitHub repository.

    Args:
        repo_owner: GitHub username or organization
        repo_name: Repository name

    Returns:
        List of dictionaries containing file content and metadata
    """
    prefix = 'https://codeload.github.com'
    url = f'{prefix}/{repo_owner}/{repo_name}/zip/refs/heads/main'
    resp = requests.get(url)

    if resp.status_code != 200:
        raise Exception(f"Failed to download repository: {resp.status_code}")

    repository_data = []
    zf = zipfile.ZipFile(io.BytesIO(resp.content))

    for file_info in zf.infolist():
        filename = file_info.filename
        filename_lower = filename.lower()

        if not (filename_lower.endswith('.md') or filename_lower.endswith('.mdx')):
            continue

        try:
            with zf.open(file_info) as f_in:
                content = f_in.read().decode('utf-8', errors='ignore')
                post = frontmatter.loads(content)
                data = post.to_dict()
                data['filename'] = filename
                repository_data.append(data)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            continue

    zf.close()
    return repository_data

## Step 7: Use the Function

In [23]:
# Download and process different repositories
task_manager_app_docs = read_repo_data('fsamura01', 'task-manager-app')
print(task_manager_app_docs[0])

{'content': '# Task Manager App\n\nA React-based task management application that allows users to create, edit, delete, and track tasks with due dates and completion status.\n\n## Features\n\n### Task Management\n- **Create Tasks**: Add new tasks with title, description, and due date\n- **Edit Tasks**: Modify existing tasks with inline editing\n- **Delete Tasks**: Remove tasks with confirmation dialog\n- **Toggle Completion**: Mark tasks as complete/incomplete with one click\n\n### User Experience\n- **Task Statistics**: View total, incomplete, and completed task counts\n- **Visual Feedback**: Different styling for completed vs incomplete tasks\n- **Loading States**: Clear feedback during API operations\n- **Error Handling**: Comprehensive error messages and validation\n\n### Form Validation\n- **Title**: Required, minimum 3 characters\n- **Description**: Required\n- **Due Date**: Required, cannot be in the past (for incomplete tasks)\n- **Real-time Validation**: Clear errors as user t

## Step 8: Inspect the Data

In [24]:
# Look at the first document
print(task_manager_app_docs)

[{'content': '# Task Manager App\n\nA React-based task management application that allows users to create, edit, delete, and track tasks with due dates and completion status.\n\n## Features\n\n### Task Management\n- **Create Tasks**: Add new tasks with title, description, and due date\n- **Edit Tasks**: Modify existing tasks with inline editing\n- **Delete Tasks**: Remove tasks with confirmation dialog\n- **Toggle Completion**: Mark tasks as complete/incomplete with one click\n\n### User Experience\n- **Task Statistics**: View total, incomplete, and completed task counts\n- **Visual Feedback**: Different styling for completed vs incomplete tasks\n- **Loading States**: Clear feedback during API operations\n- **Error Handling**: Comprehensive error messages and validation\n\n### Form Validation\n- **Title**: Required, minimum 3 characters\n- **Description**: Required\n- **Due Date**: Required, cannot be in the past (for incomplete tasks)\n- **Real-time Validation**: Clear errors as user 

## Today’s Tasks (Day 2)

### 1. Simple Chunking

In [26]:
def sliding_window(seq, size, step):
    if size <= 0 or step <= 0:
        raise ValueError("size and step must be positive")

    n = len(seq)
    result = []
    for i in range(0, n, step):
        chunk = seq[i:i+size]
        result.append({'start': i, 'chunk': chunk})
        if i + size >= n:
            break

    return result

In [28]:
task_manager_app_chunks = []

for doc in task_manager_app_docs:
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')
    chunks = sliding_window(doc_content, 2000, 1000)
    for chunk in chunks:
        chunk.update(doc_copy)
    task_manager_app_chunks.extend(chunks)

In [11]:
task_manager_app_chunks

[{'start': 0,
  'chunk': '# Task Manager App\n\nA React-based task management application that allows users to create, edit, delete, and track tasks with due dates and completion status.\n\n## Features\n\n### Task Management\n- **Create Tasks**: Add new tasks with title, description, and due date\n- **Edit Tasks**: Modify existing tasks with inline editing\n- **Delete Tasks**: Remove tasks with confirmation dialog\n- **Toggle Completion**: Mark tasks as complete/incomplete with one click\n\n### User Experience\n- **Task Statistics**: View total, incomplete, and completed task counts\n- **Visual Feedback**: Different styling for completed vs incomplete tasks\n- **Loading States**: Clear feedback during API operations\n- **Error Handling**: Comprehensive error messages and validation\n\n### Form Validation\n- **Title**: Required, minimum 3 characters\n- **Description**: Required\n- **Due Date**: Required, cannot be in the past (for incomplete tasks)\n- **Real-time Validation**: Clear err

In [29]:
import os

In [30]:
api_key = os.environ.get("GROQ_API_KEY")

In [31]:
if api_key is None:
    print("Error: GROQ_API_KEY environment variable not set.")
else:
    print("API key successfully loaded.")

API key successfully loaded.


In [15]:
#from openai import OpenAI
from groq import Groq

groq_client = Groq(api_key=api_key)

def llm(prompt, model='llama-3.1-8b-instant'): # Updated to a currently supported Groq model
    messages = [
        {"role": "user", "content": prompt}
    ]

    response = groq_client.chat.completions.create(
        model=model,
        messages=messages
    )

    return response.choices[0].message.content

In [16]:
prompt_template = """
Split the provided document into logical sections
that make sense for a Q&A system.

Each section should be self-contained and cover
a specific topic or concept.

<DOCUMENT>
{document}
</DOCUMENT>

Use this format:

## Section Name

Section content with all relevant details

---

## Another Section Name

Another section content

---
""".strip()

In [17]:
def intelligent_chunking(text):
    prompt = prompt_template.format(document=text)
    response = llm(prompt)
    sections = response.split('---')
    sections = [s.strip() for s in sections if s.strip()]
    return sections

In [18]:
from tqdm.auto import tqdm

task_manager_app_chunks = []

# Maximum characters allowed in doc_content for a single LLM call
# This is a heuristic to prevent 'Payload Too Large' errors (HTTP 413).
# A conservative estimate of 20000 characters for the document content
# aims to keep the total prompt (document + instructions) well within API limits.
MAX_DOC_CHARS_FOR_LLM = 20000

for doc in tqdm(task_manager_app_docs):
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')

    if not doc_content.strip():
        # Skip empty documents to avoid sending empty prompts
        continue

    if len(doc_content) > MAX_DOC_CHARS_FOR_LLM:
        print(f"Skipping intelligent chunking for '{doc_copy.get('filename', 'Unknown')}' "
              f"because its content is too large ({len(doc_content)} chars > {MAX_DOC_CHARS_FOR_LLM} chars). "
              "Consider pre-chunking large documents with a simpler method before LLM processing.")
        continue

    sections = intelligent_chunking(doc_content)
    for section in sections:
        section_doc = doc_copy.copy()
        section_doc['section'] = section
        task_manager_app_chunks.append(section_doc)

  0%|          | 0/3 [00:00<?, ?it/s]

In [19]:
task_manager_app_chunks

[{'filename': 'task-manager-app-main/README.md',
  'section': '## Project Overview\n\nThe Task Manager App is a React-based application that allows users to create, edit, delete, and track tasks with due dates and completion status. It has a user-friendly interface with real-time validation, error handling, and a clear loading state.'},
 {'filename': 'task-manager-app-main/README.md',
  'section': '## Features\n\n### Task Management\n\nThe app offers key features for efficient task management:\n\n*   Create Tasks: Users can add new tasks with title, description, and due date.\n*   Edit Tasks: Users can modify existing tasks using inline editing.\n*   Delete Tasks: Users can remove tasks with a confirmation dialog.\n*   Toggle Completion: Users can mark tasks as complete or incomplete with a single click.\n\n### User Experience\n\nThe app prioritizes user experience with features such as:\n\n*   Task Statistics: Users can view total, incomplete, and completed task counts.\n*   Visual Fe

## Day 3: Add Search

### 1. Text search

In [20]:
task_manager_app_docs = read_repo_data('fsamura01', 'task-manager-app')

In [21]:
task_manager_app_chunks = []

for doc in task_manager_app_docs:
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')
    chunks = sliding_window(doc_content, 2000, 1000)
    for chunk in chunks:
        chunk.update(doc_copy)
    task_manager_app_chunks.extend(chunks)

In [22]:
task_manager_app_chunks

[{'start': 0,
  'chunk': '# Task Manager App\n\nA React-based task management application that allows users to create, edit, delete, and track tasks with due dates and completion status.\n\n## Features\n\n### Task Management\n- **Create Tasks**: Add new tasks with title, description, and due date\n- **Edit Tasks**: Modify existing tasks with inline editing\n- **Delete Tasks**: Remove tasks with confirmation dialog\n- **Toggle Completion**: Mark tasks as complete/incomplete with one click\n\n### User Experience\n- **Task Statistics**: View total, incomplete, and completed task counts\n- **Visual Feedback**: Different styling for completed vs incomplete tasks\n- **Loading States**: Clear feedback during API operations\n- **Error Handling**: Comprehensive error messages and validation\n\n### Form Validation\n- **Title**: Required, minimum 3 characters\n- **Description**: Required\n- **Due Date**: Required, cannot be in the past (for incomplete tasks)\n- **Real-time Validation**: Clear err

In [23]:
from minsearch import Index

index = Index(
    text_fields=["chunk", "filename"],
    keyword_fields=[]
)

index.fit(task_manager_app_chunks)

<minsearch.minsearch.Index at 0x1d0d2d9ae90>

In [24]:
query = 'Overview of the task manager app'
results = index.search(query)

### 2. Vector search

In [25]:
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer('multi-qa-distilbert-cos-v1')

In [26]:
record = task_manager_app_chunks[2]

In [27]:
text = record['chunk']
v_doc = embedding_model.encode(text)

In [28]:
from tqdm.auto import tqdm
from minsearch import VectorSearch
import numpy as np

task_manager_embeddings = []

for d in tqdm(task_manager_app_chunks):
    v = embedding_model.encode(d['chunk'])
    task_manager_embeddings.append(v)

task_manager_embeddings = np.array(task_manager_embeddings)

task_manager_vindex = VectorSearch()
task_manager_vindex.fit(task_manager_embeddings, task_manager_app_chunks)


  0%|          | 0/6 [00:00<?, ?it/s]

<minsearch.vector.VectorSearch at 0x1d0dff6fe90>

In [29]:
query = 'features of the task manager app'
q = embedding_model.encode(query)
results = task_manager_vindex.search(q)
results

[{'start': 0,
  'chunk': '# Task Manager App\n\nA React-based task management application that allows users to create, edit, delete, and track tasks with due dates and completion status.\n\n## Features\n\n### Task Management\n- **Create Tasks**: Add new tasks with title, description, and due date\n- **Edit Tasks**: Modify existing tasks with inline editing\n- **Delete Tasks**: Remove tasks with confirmation dialog\n- **Toggle Completion**: Mark tasks as complete/incomplete with one click\n\n### User Experience\n- **Task Statistics**: View total, incomplete, and completed task counts\n- **Visual Feedback**: Different styling for completed vs incomplete tasks\n- **Loading States**: Clear feedback during API operations\n- **Error Handling**: Comprehensive error messages and validation\n\n### Form Validation\n- **Title**: Required, minimum 3 characters\n- **Description**: Required\n- **Due Date**: Required, cannot be in the past (for incomplete tasks)\n- **Real-time Validation**: Clear err

### 3. Hybrid search

In [30]:
def text_search(query):
    return index.search(query, num_results=5)

def vector_search(query):
    q = embedding_model.encode(query)
    return task_manager_vindex.search(q, num_results=5)

def hybrid_search(query):
    text_results = text_search(query)
    vector_results = vector_search(query)
    
    # Combine and deduplicate results
    seen_ids = set()
    combined_results = []

    for result in text_results + vector_results:
        if result['filename'] not in seen_ids:
            seen_ids.add(result['filename'])
            combined_results.append(result)
    
    return combined_results

In [31]:
query = 'features of the task manager app'
result = hybrid_search(query)

In [32]:
result

[{'start': 1000,
  'chunk': 'istic Updates**: Immediate UI updates for better UX\n- **Auto-refresh**: Tasks update instantly after creation/editing\n- **Conflict Detection**: Handles concurrent edits gracefully\n\n## Technical Details\n\n### Components\n- **App**: Main application component with task list and state management\n- **TaskCreationForm**: Form for creating new tasks\n- **TaskEditForm**: Inline editing component for existing tasks\n\n### API Integration\n- RESTful API calls to `http://localhost:5000/api/tasks`\n- Supports GET, POST, PUT, and DELETE operations\n- Proper error handling and status code responses\n\n### State Management\n- React hooks (useState, useEffect) for local state\n- Optimistic updates for immediate user feedback\n- Separation of concerns between form and display logic\n\n## Installation\n\n1. Clone the repository\n2. Install dependencies: `npm install`\n3. Start the development server: `npm start`\n4. Ensure backend API is running on `http://localhost:5

### Step 1: Load Your Data

In [33]:
task_manager_app_docs = read_repo_data('fsamura01', 'task-manager-app')

In [34]:
# Your existing chunked data
print(f"Loaded {len(task_manager_app_chunks)} documentation chunks")

# Preview categories
categories = set(chunk.get('category', 'Unknown') for chunk in task_manager_app_chunks)
print(f"Categories: {categories}")

topics = set(chunk.get('topic', 'Unknown') for chunk in task_manager_app_chunks)
print(f"Topics: {topics}")

Loaded 6 documentation chunks
Categories: {'Unknown'}
Topics: {'Unknown'}


In [35]:
task_manager_app_chunks = []

# Maximum characters allowed in doc_content for a single LLM call
# This is a heuristic to prevent 'Payload Too Large' errors (HTTP 413).
# A conservative estimate of 20000 characters for the document content
# aims to keep the total prompt (document + instructions) well within API limits.
MAX_DOC_CHARS_FOR_LLM = 20000

for doc in tqdm(task_manager_app_docs):
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')

    if not doc_content.strip():
        # Skip empty documents to avoid sending empty prompts
        continue

    if len(doc_content) > MAX_DOC_CHARS_FOR_LLM:
        print(f"Skipping intelligent chunking for '{doc_copy.get('filename', 'Unknown')}' "
              f"because its content is too large ({len(doc_content)} chars > {MAX_DOC_CHARS_FOR_LLM} chars). "
              "Consider pre-chunking large documents with a simpler method before LLM processing.")
        continue

    sections = intelligent_chunking(doc_content)
    for section in sections:
        section_doc = doc_copy.copy()
        section_doc['section'] = section
        task_manager_app_chunks.append(section_doc)

  0%|          | 0/3 [00:00<?, ?it/s]

In [38]:
task_manager_app_chunks

[{'filename': 'task-manager-app-main/README.md',
  'section': '## Table of Contents\n\n*   [Task Manager App Overview](#task-manager-app-overview)\n*   [Features](#features)\n    *   [Task Management](#task-management)\n    *   [User Experience](#user-experience)\n    *   [Form Validation](#form-validation)\n    *   [Data Management](#data-management)\n*   [Technical Details](#technical-details)\n    *   [Components](#components)\n    *   [API Integration](#api-integration)\n    *   [State Management](#state-management)\n*   [Installation and Usage](#installation-and-usage)\n    *   [Task Manager App Installation](#task-manager-app-installation)\n    *   [Task Manager App Usage](#task-manager-app-usage)\n*   [API Requirements](#api-requirements)\n*   [Task Manager API](#task-manager-api)\n    *   [Features](#task-manager-api-features)\n    *   [Data Management](#task-manager-api-data-management)\n    *   [API Endpoints](#task-manager-api-endpoints)\n    *   [Database Schema](#task-mana

In [39]:
# Create text search index
text_search = Index(
    text_fields=["chunk", "category", "topic", "filename", "section"],
    keyword_fields=[]
)

# Index your documentation
text_search.fit(task_manager_app_chunks)
print(f"✓ Indexed {len(task_manager_app_chunks)} chunks")

✓ Indexed 11 chunks


### Step 2: Text Search (Fastest - Start Here!)

In [41]:
def search_text(query, num_results=3):
    """Quick text search function."""
    results = text_search.search(query, num_results=num_results)
    
    print(f"\nQuery: '{query}'")
    print(f"Found {len(results)} results\n")
    
    for i, result in enumerate(results, 1):
        print(f"--- Result {i} ---")
        print(f"Category: {result.get('category', 'N/A')}")
        print(f"Topic: {result.get('topic', 'N/A')}")
        print(f"Content: {result['section'][:200]}...\n")
    
    return results

# Try these queries!
search_text("How do I create a task?")
search_text("What are the API endpoints?")
search_text("database schema")
search_text("validation rules")


Query: 'How do I create a task?'
Found 3 results

--- Result 1 ---
Category: N/A
Topic: N/A
Content: ## Installation and Usage

### Task Manager App Installation

1. Clone the repository
2. Install dependencies: `npm install`
3. Start the development server: `npm start`
4. Ensure backend API is runni...

--- Result 2 ---
Category: N/A
Topic: N/A
Content: ## API Requirements

The app expects a backend API with these endpoints:
- `GET /api/tasks` - Retrieve all tasks
- `POST /api/tasks` - Create new task
- `GET /api/tasks/:id` - Get specific task
- `PUT...

--- Result 3 ---
Category: N/A
Topic: N/A
Content: ## Table of Contents

*   [Task Manager App Overview](#task-manager-app-overview)
*   [Features](#features)
    *   [Task Management](#task-management)
    *   [User Experience](#user-experience)
    ...


Query: 'What are the API endpoints?'
Found 3 results

--- Result 1 ---
Category: N/A
Topic: N/A
Content: ## API Requirements

The app expects a backend API with these endpoints:
- 

[{'filename': 'task-manager-app-main/README.md',
  'section': '## Task Manager API\n\n### Features\n\n- **Create Tasks**: Add new tasks with title, description, due date, and user assignment\n- **View Tasks**: Get all tasks or retrieve specific tasks by ID\n- **Update Tasks**: Edit task details with optimistic concurrency control\n- **Delete Tasks**: Remove tasks with validation and error handling\n- **Task Completion**: Mark tasks as complete/incomplete with automatic timestamp tracking\n\n### Data Management\n\n- **User Integration**: Tasks are linked to users with name and email display\n- **Timestamp Tracking**: Automatic creation and update timestamps\n- **Data Validation**: Server-side validation for all task fields\n- **Sanitization**: Input cleaning and length validation\n\n### API Endpoints\n\n| Method | Endpoint | Description |\n|'},
 {'filename': 'task-manager-app-main/README.md',
  'section': '## Table of Contents\n\n*   [Task Manager App Overview](#task-manager-app-overvie

### Step 3: Vector Search (Semantic Understanding)

In [43]:
# Create embeddings for all chunks
embeddings = []

for chunk in tqdm(task_manager_app_chunks, desc="Creating embeddings"):
    # Combine category, topic, and content for better context
    text = f"{chunk.get('category', '')} {chunk.get('topic', '')} {chunk['section']}"
    v = embedding_model.encode(text)
    embeddings.append(v)

embeddings = np.array(embeddings)
print(f"✓ Created embeddings: shape {embeddings.shape}")

# Build vector search index
vector_search = VectorSearch()
vector_search.fit(embeddings, task_manager_app_chunks)
print("✓ Vector search ready!")

Creating embeddings:   0%|          | 0/11 [00:00<?, ?it/s]

✓ Created embeddings: shape (11, 768)
✓ Vector search ready!


In [45]:
def search_vector(query, num_results=3):
    """Quick vector search function."""
    q = embedding_model.encode(query)
    results = vector_search.search(q, num_results=num_results)
    
    print(f"\nQuery: '{query}'")
    print(f"Found {len(results)} results\n")
    
    for i, result in enumerate(results, 1):
        print(f"--- Result {i} ---")
        print(f"Category: {result.get('category', 'N/A')}")
        print(f"Topic: {result.get('topic', 'N/A')}")
        print(f"Content: {result['section'][:200]}...\n")
    
    return results

# Try paraphrased queries (vector search handles these better!)
search_vector("How can I add a new task to the system?")
search_vector("What REST API calls are available?")
search_vector("Tell me about the data structure")


Query: 'How can I add a new task to the system?'
Found 3 results

--- Result 1 ---
Category: N/A
Topic: N/A
Content: ## Task Manager API

### Features

- **Create Tasks**: Add new tasks with title, description, due date, and user assignment
- **View Tasks**: Get all tasks or retrieve specific tasks by ID
- **Update ...

--- Result 2 ---
Category: N/A
Topic: N/A
Content: ## Installation and Usage

### Task Manager App Installation

1. Clone the repository
2. Install dependencies: `npm install`
3. Start the development server: `npm start`
4. Ensure backend API is runni...

--- Result 3 ---
Category: N/A
Topic: N/A
Content: ## Features

### Task Management

- **Create Tasks**: Add new tasks with title, description, and due date
- **Edit Tasks**: Modify existing tasks with inline editing
- **Delete Tasks**: Remove tasks w...


Query: 'What REST API calls are available?'
Found 3 results

--- Result 1 ---
Category: N/A
Topic: N/A
Content: ## Technical Details

### Components

- **App**: Mai

[{'filename': 'task-manager-app-main/README.md',
  'section': '## Table of Contents\n\n*   [Task Manager App Overview](#task-manager-app-overview)\n*   [Features](#features)\n    *   [Task Management](#task-management)\n    *   [User Experience](#user-experience)\n    *   [Form Validation](#form-validation)\n    *   [Data Management](#data-management)\n*   [Technical Details](#technical-details)\n    *   [Components](#components)\n    *   [API Integration](#api-integration)\n    *   [State Management](#state-management)\n*   [Installation and Usage](#installation-and-usage)\n    *   [Task Manager App Installation](#task-manager-app-installation)\n    *   [Task Manager App Usage](#task-manager-app-usage)\n*   [API Requirements](#api-requirements)\n*   [Task Manager API](#task-manager-api)\n    *   [Features](#task-manager-api-features)\n    *   [Data Management](#task-manager-api-data-management)\n    *   [API Endpoints](#task-manager-api-endpoints)\n    *   [Database Schema](#task-mana

### Step 4: Hybrid Search (Best of Both Worlds!)

In [47]:
def search_hybrid(query, num_results=5):
    """
    Hybrid search combining text and vector methods.
    Returns more comprehensive results!
    """
    # Get results from both methods
    text_results = text_search.search(query, num_results=num_results)
    
    q = embedding_model.encode(query)
    vector_results = vector_search.search(q, num_results=num_results)
    
    # Deduplicate
    seen = set()
    combined = []
    
    for result in text_results + vector_results:
        key = (result['section'], result['filename'])
        if key not in seen:
            seen.add(key)
            combined.append(result)
    
    print(f"\nQuery: '{query}'")
    print(f"Text found: {len(text_results)}, Vector found: {len(vector_results)}")
    print(f"Combined unique: {len(combined)}\n")
    
    for i, result in enumerate(combined[:num_results], 1):
        print(f"--- Result {i} ---")
        print(f"Category: {result.get('category', 'N/A')}")
        print(f"Topic: {result.get('topic', 'N/A')}")
        print(f"Content: {result['section'][:200]}...\n")
    
    return combined

# Try hybrid search!
search_hybrid("How do I set up the backend server?")
search_hybrid("What React components are available?")
search_hybrid("Tell me about error handling")


Query: 'How do I set up the backend server?'
Text found: 5, Vector found: 5
Combined unique: 8

--- Result 1 ---
Category: N/A
Topic: N/A
Content: ## Installation and Usage

### Task Manager App Installation

1. Clone the repository
2. Install dependencies: `npm install`
3. Start the development server: `npm start`
4. Ensure backend API is runni...

--- Result 2 ---
Category: N/A
Topic: N/A
Content: -|
| GET | `/api/tasks` | Get all tasks with user information |
| GET | `/api/tasks/:id` | Get specific task by ID |
| POST | `/api/tasks` | Create new task |
| PUT | `/api/tasks/:id` | Update existin...

--- Result 3 ---
Category: N/A
Topic: N/A
Content: ## API Requirements

The app expects a backend API with these endpoints:
- `GET /api/tasks` - Retrieve all tasks
- `POST /api/tasks` - Create new task
- `GET /api/tasks/:id` - Get specific task
- `PUT...

--- Result 4 ---
Category: N/A
Topic: N/A
Content: ## Task Manager API

### Features

- **Create Tasks**: Add new tasks with title, des

[{'filename': 'task-manager-app-main/README.md',
  'section': '## Features\n\n### Task Management\n\n- **Create Tasks**: Add new tasks with title, description, and due date\n- **Edit Tasks**: Modify existing tasks with inline editing\n- **Delete Tasks**: Remove tasks with confirmation dialog\n- **Toggle Completion**: Mark tasks as complete/incomplete with one click\n\n### User Experience\n\n- **Task Statistics**: View total, incomplete, and completed task counts\n- **Visual Feedback**: Different styling for completed vs incomplete tasks\n- **Loading States**: Clear feedback during API operations\n- **Error Handling**: Comprehensive error messages and validation\n\n### Form Validation\n\n- **Title**: Required, minimum 3 characters\n- **Description**: Required\n- **Due Date**: Required, cannot be in the past (for incomplete tasks)\n- **Real-time Validation**: Clear errors as user types\n\n### Data Management\n\n- **Optimistic Updates**: Immediate UI updates for better UX\n- **Auto-refres

In [45]:
def compare_all(query):
    """Compare all three search methods side-by-side."""
    print("="*70)
    print(f"QUERY: '{query}'")
    print("="*70)
    
    # Text search
    print("\n--- TEXT SEARCH ---")
    text_results = text_search.search(query, num_results=3)
    for i, r in enumerate(text_results, 1):
        print(f"{i}. [{r.get('category')}] {r['chunk'][:80]}...")
    
    # Vector search
    print("\n--- VECTOR SEARCH ---")
    q = embedding_model.encode(query)
    vector_results = vector_search.search(q, num_results=3)
    for i, r in enumerate(vector_results, 1):
        print(f"{i}. [{r.get('category')}] {r['chunk'][:80]}...")
    
    # Hybrid search
    print("\n--- HYBRID SEARCH ---")
    hybrid_results = search_hybrid(query, num_results=3)

# Test comparison
compare_all("How do I validate task inputs?")
compare_all("What error codes does the API return?")

QUERY: 'How do I validate task inputs?'

--- TEXT SEARCH ---
1. [None]  at the top with title, description, and due date
2. **Edit a Task**: Click the ...
2. [None] istic Updates**: Immediate UI updates for better UX
- **Auto-refresh**: Tasks up...
3. [None] ic tasks by ID
- **Update Tasks**: Edit task details with optimistic concurrency...

--- VECTOR SEARCH ---
1. [None] ic tasks by ID
- **Update Tasks**: Edit task details with optimistic concurrency...
2. [None] # Task Manager App

A React-based task management application that allows users ...
3. [None]  at the top with title, description, and due date
2. **Edit a Task**: Click the ...

--- HYBRID SEARCH ---

Query: 'How do I validate task inputs?'
Text found: 3, Vector found: 3
Combined unique: 4

--- Result 1 ---
Category: N/A
Topic: N/A
Content:  at the top with title, description, and due date
2. **Edit a Task**: Click the "Edit" button on any task to modify it
3. **Complete a Task**: Click "Mark Complete" to toggle completion 