In [1]:
import docs

In [36]:
def read_github_data(repo_owner, repo_name):
    allowed_extensions = {
        "md", "py", "ipynb", "yaml", "yml", "Makefile", "Dockerfile", "toml",# "json"
    }

    reader = docs.GithubRepositoryDataReader(
        repo_owner,
        repo_name,
        allowed_extensions=allowed_extensions,
    )
    
    return reader.read()

repo_owner = 'alexeygrigorev'
repo_name = 'data-engineering-rag'

files = read_github_data(repo_owner, repo_name)

In [37]:
file_index = {}

for f in files:
    file_index[f.filename] = f.content

In [38]:
import nbformat
from nbconvert import MarkdownExporter, PythonExporter
from nbconvert.preprocessors import ClearOutputPreprocessor

class NotebookMarkdownFormatter:
    """Converts Jupyter notebook content to markdown format."""

    def __init__(self):
        self.exporter = PythonExporter()
        self.exporter.register_preprocessor(ClearOutputPreprocessor(), enabled=True)

    def format(self, raw_notebook: str) -> str:
        nb_parsed = nbformat.reads(
            raw_notebook,
            as_version=nbformat.NO_CONVERT,
        )
        md_body, _ = self.exporter.from_notebook_node(nb_parsed)
        return md_body

In [39]:
notebook_processor = NotebookMarkdownFormatter()

In [40]:
file_index = {}

for f in files:
    content = f.content
    if f.filename.endswith('.ipynb'):
        content = notebook_processor.format(content)
    file_index[f.filename] = content

In [44]:
import os

class ProjectEvaluationTools:
    def __init__(self, file_index: dict[str, str]):
        self.file_index = file_index

    def read(self, filename):
        """Return the contents of a file."""
        return self.file_index.get(filename)

    def grep(self, patterns: list[str]):
        """
        Search for all patterns in all files.
        Returns: { filename: [matching lines] }
        """
        results = {}

        for filename, content in self.file_index.items():
            lines = content.splitlines()
            matches = []
            for line in lines:
                if any(p in line for p in patterns):
                    matches.append(line)
            if matches:
                results[filename] = matches

        return results

    def tree(self, wd: str = '.'):
        """
        Return a sorted list of files under the given directory.
        """
        normalized = wd.rstrip('/') + '/'
        if wd in ('', '.', './'):
            normalized = ''  # root
    
        return sorted(
            f for f in self.file_index.keys()
            if f.startswith(normalized)
        )



In [45]:
eval_tools = ProjectEvaluationTools(file_index)

In [46]:
eval_tools.tree()

['README.md',
 'agents.ipynb',
 'data-processing.ipynb',
 'debug.ipynb',
 'evals/analysis.ipynb',
 'pyproject.toml',
 'zc_agent/__init__.py',
 'zc_agent/eval/async_paralell.py',
 'zc_agent/eval/calculate_metrics.py',
 'zc_agent/eval/generate_questions.py',
 'zc_agent/eval/run_agent.py',
 'zc_agent/llm.py',
 'zc_agent/load_data.py',
 'zc_agent/logs.py',
 'zc_agent/main.py',
 'zc_agent/prepare_data.py',
 'zc_agent/prompts/__init__.py',
 'zc_agent/prompts/code_doc.md',
 'zc_agent/prompts/eval_checklist.md',
 'zc_agent/prompts/eval_question_generator.md',
 'zc_agent/prompts/eval_user.md',
 'zc_agent/prompts/notebook_edit.md',
 'zc_agent/prompts/search_agent.md',
 'zc_agent/search_agent.py',
 'zc_agent/search_tools.py']

In [48]:
from pydantic import BaseModel
from typing import Literal, Optional

class EvaluationItem(BaseModel):
    points: int
    description: str

class EvaluationCriterion(BaseModel):
    name: str
    kind: Literal["single", "checklist"]
    items: list[EvaluationItem]
    comment: Optional[str] = None

class EvaluationCriteria(BaseModel):
    criteria: list[EvaluationCriterion]

In [49]:
import yaml

In [50]:
with open('criteria.yaml', 'r') as f_in:
    criteria_raw = yaml.load(f_in, yaml.SafeLoader)

In [54]:
criteria = EvaluationCriteria.model_validate(criteria_raw)

In [55]:
criteria

EvaluationCriteria(criteria=[EvaluationCriterion(name='Dataset', kind='single', items=[EvaluationItem(points=0, description='The project uses the FAQ dataset from https://github.com/DataTalksClub/faq'), EvaluationItem(points=2, description='The project uses an original dataset')], comment=None), EvaluationCriterion(name='Data pipeline', kind='single', items=[EvaluationItem(points=0, description='No data processing pipeline'), EvaluationItem(points=1, description='Basic data loading with minimal processing'), EvaluationItem(points=2, description='Well-structured data pipeline with ingestion, processing, and indexing steps')], comment=None), EvaluationCriterion(name='Agent implementation', kind='single', items=[EvaluationItem(points=0, description='No agent code or agent code is embedded in notebooks'), EvaluationItem(points=1, description='Agent code exists but not well-organized or partially in notebooks'), EvaluationItem(points=2, description='Agent code is modular, reusable, and stor