# Utilities for LearnMateAI

This notebook contains utility functions for PDF reading and processing, as used in the LearnMateAI project. You can run/test cells interactively here.


In [None]:
import io
from typing import Union
import fitz  # PyMuPDF

class DocumentReader:
    """Minimal PDF text extractor using PyMuPDF."""
    def __init__(self, file_obj: Union[str, io.BytesIO]):
        self.file_obj = file_obj
    def extract_text(self) -> str:
        if isinstance(self.file_obj, io.BytesIO):
            self.file_obj.seek(0)
            doc = fitz.open(stream=self.file_obj.read(), filetype="pdf")
        else:
            doc = fitz.open(self.file_obj)
        contents = []
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)
            text = page.get_text("text")
            if text:
                contents.append(text)
        return "\n".join(contents)



## Example: Reading Text from a PDF File
Here is how you would use the DocumentReader class.

> **Note:** For demonstration, test with a local PDF file or a BytesIO buffer.


In [None]:
# Example usage (uncomment to run with your own test PDF)
# reader = DocumentReader('sample.pdf')
# text = reader.extract_text()
# print(text[:1000])  # Print first 1000 characters

