In [None]:
from pydantic import field_serializer, BaseModel
from github.Commit import Commit
from typing import Any
from pydantic import ConfigDict, field_validator

from seer.automation.autofix.models import IssueDetails, EventDetails

class EvalItem(BaseModel):
    raw_data: dict[str, Any]
    commit: Commit
    issue: IssueDetails
    event: EventDetails

    model_config = ConfigDict(
        arbitrary_types_allowed=True
    )

    @field_serializer('commit')
    def serialize_commit(self, commit: Commit, _info):
        return commit.sha
    
    @field_validator('commit', mode="before")
    @classmethod
    def validate_commit(cls, commit: Commit | str):
        return commit if isinstance(commit, Commit) else repo.get_commit(commit)
    
class EvalItemWithDiff(EvalItem):
    diff: str

In [None]:
import json

eval_file = '../data/full_eval_autofix_240314.json'

with open(eval_file, 'r') as file:
    tmp_autofix_data = json.load(file)

eval_data = [EvalItemWithDiff.model_validate(item) for item in tmp_autofix_data]

print(f"Loaded {len(eval_data)} eval items")

Save dataset to langsmith:

In [None]:
from langsmith import Client

# Inputs are provided to your model, so it know what to generate
dataset_inputs = [item.model_dump(mode='json') for item in eval_data]

# Outputs are provided to the evaluator, so it knows what to compare to
# Outputs are optional but recommended.
dataset_outputs = [{ "diff": item.diff } for item in eval_data]

client = Client()
dataset_name = "Autofix Eval Full 240314"

# Storing inputs in a dataset lets us
# run chains and LLMs over a shared set of examples.
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Autofix full eval made from mapping sentry <-> github commits for sentry project",
)
client.create_examples(
    inputs=dataset_inputs,
    outputs=dataset_outputs,
    dataset_id=dataset.id,
)