# P-Chat 🔒💬

A privacy-focused bring-your-own-document (BYOD) solution that empowers you to leverage the power of LLMs to interact with your documents. Nothing is persisted, and it exists entirely in ephemeral memory.

## Features
- Parent-child chunking used to enrich the context
- Chunk augmentation with some parent data for structured documents
- Streamed responses for better user experience
- Secure by design; no data is stored permanently
- Uses locally-running Ollama for total privacy

In [None]:
%pip install -qU langchain_ollama langchain_chroma langchain_community

In [None]:
import logging
import sys
from pathlib import Path
from enum import StrEnum

import gradio as gr
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain.storage import InMemoryStore
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader

In [None]:
logger = logging.getLogger('rag')
logger.setLevel(logging.DEBUG)

if not logger.handlers:
    handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

## RAG Pipeline

In [None]:
def pretty_print(l: list[Document | tuple[Document, float]]):
    for i,item in enumerate(l, start=1):
        logger.debug('-' * 80 + '\n')

        if isinstance(item, tuple):
            doc, score = item
            logger.debug(f'{i}. characters: {len(doc.page_content)}\n')
            logger.debug(f'Score: {score}\nMetadata: {doc.metadata}\nContent: {doc.page_content}')
        else:
            logger.debug(f'{i}. characters: {len(item.page_content)}\n')
            logger.debug(f'Metadata: {item.metadata}\nContent: {item.page_content}')

### Indexing


In [None]:
model_id = 'qwen3:0.6b'
embedding_model = 'nomic-embed-text:latest'

embeddings = OllamaEmbeddings(model=embedding_model)
model = ChatOllama(model=model_id, temperature=0.1)

vectorstore = Chroma(
    collection_name='p-chat',
    embedding_function=embeddings,
)
docstore = InMemoryStore()

class Metadata(StrEnum):
    ID = 'id'
    PARENT_ID = 'parent_id'
    SOURCE = 'source'
    FILE_TYPE = 'file_type'


LOADER_MAPPING = {
    '.md': TextLoader,
    '.txt': TextLoader, 
}

def load_documents(file_path: Path) -> list[Document]:
    # p = Path(file_path)
    extension = file_path.suffix
    logger.info(f'Loading loader for {extension}')
    loader_cls = LOADER_MAPPING.get(extension)

    if loader_cls is None:
        logger.warning(f'No loader configured for {extension}')
        return []
    
    loader = loader_cls(file_path)
    documents = loader.load()
    logger.info(f'{len(documents)} loaded for {file_path.name}')

    return documents


def preprocess(documents: list[Document]) -> list[Document]:
    # Perform any cleaning, etc.
    import uuid

    for doc in documents:
        metadata = doc.metadata
        shortened_source = metadata.get('source').split('/')[-1]

        metadata[Metadata.ID] = str(uuid.uuid4())
        metadata[Metadata.SOURCE] = shortened_source
        metadata[Metadata.FILE_TYPE] = shortened_source.split('.')[-1]

    return documents


def index_document(file_path):
    documents = load_documents(Path(file_path))
    preprocessed_docs = preprocess(documents)
    logger.debug([doc.metadata for doc in preprocessed_docs])

    for doc in preprocessed_docs:
        chunks = chunk_documents(doc)

        vectorstore.add_documents(chunks)
        docstore.mset([(doc.metadata.get(Metadata.ID) , doc)])


def chunk_documents(parent: Document) -> list[Document]:
    if parent.metadata.get(Metadata.FILE_TYPE) == '.md':
        headers_to_split_on = [
            ('#', 'employee_name'),
            ('##', 'section'),
            ('###', 'Header 3'),
        ] 
        markdown_splitter = MarkdownHeaderTextSplitter(
            headers_to_split_on=headers_to_split_on
        )
        chunks = markdown_splitter.split_text(parent.page_content)  
    else:
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=400,
            chunk_overlap=80,
            separators=['\n\n', '\n', ' ', '']
        )
        chunks = text_splitter.split_text(parent.page_content)

    children = []
    parent_id = parent.metadata.get(Metadata.ID)
    for i, chunk in enumerate(chunks, start=1):
        if isinstance(chunk, Document):
            metadata = {**parent.metadata, **chunk.metadata}
            augmented_text = f'[Employee: {metadata.get('employee_name')}] '
            content = augmented_text + chunk.page_content
        else:
            # chunk is a text
            metadata = parent.metadata.copy()
            content = chunk

        metadata.update({
            Metadata.ID: f'{parent_id}-{i}',
            Metadata.PARENT_ID: parent_id,
        })
        children.append(Document(page_content=content, metadata=metadata))

    logger.debug(f'Number chunks: {len(children)}, Parent ID: {parent_id}')
    
    return children

### LLM Interaction

In [None]:
def retrieve_context(query) -> str:
    results = vectorstore.similarity_search(query)
    logger.info(f'Matching records: {len(results)}')
    selected_parents = {}
    for result in results:
        parent_id = result.metadata.get('parent_id')
        if parent_id in selected_parents:
            continue

        parents = docstore.mget([parent_id])
        selected_parents[parent_id] = parents[0]

    logger.info(f'Selected documents for query: {query} ids:{selected_parents.keys()}')
    context = '\n\n'.join([doc.page_content for _,doc in selected_parents.items() if doc is not None])

    return context

        
def ask(message, history):
    context = retrieve_context(message)
    prompt = f'''
    You are helpful assistant that answers a question based on the provided context.
    If the context is not helpful to you in answering the question, say so.
    Be concise with your responses.

    Context:
    {context}
    '''

    messages = [
        ('system', prompt),
        ('user', message)
    ]

    stream = model.stream(messages)
    response_text = ''

    for chunk in stream:
        response_text += chunk.content or ''
        if not response_text:
            continue

        yield response_text

## Gradio UI

In [None]:
def chat(message, history):
    if message is None:
        return ''

    text_input = message.get('text', '')
    files_uploaded = message.get('files', [])
    
    latest_file_path = files_uploaded[-1] if files_uploaded else None
    if latest_file_path:
        index_document(latest_file_path)


    if not text_input:
        yield '✅ Indexed document'
        return

    for chunk in ask(text_input, history):
        yield chunk

title = 'P-Chat 🔒💬'
with gr.Blocks(title=title, fill_height=True) as ui:
    gr.Markdown(f'# {title}')
    gr.Markdown('## Privacy-focused bring-your-own-document (BYOD) solution 🤫.')

    gr.ChatInterface(
        fn=chat,
        type='messages',
        textbox=gr.MultimodalTextbox(file_types=['text', '.txt', '.md'], autofocus=True),
    )

ui.launch(debug=True)