In [None]:
!pip install -r ../requirements.txt

In [1]:
import os
os.environ['DATABASE_URL'] = "postgresql+psycopg://root:seer@localhost:5433/seer"
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = "ai-autofix-evals"

import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '../src')))

import logging

logger = logging.getLogger('autofix')
logger.setLevel(logging.DEBUG)
logger.handlers = []
logger.addHandler(logging.StreamHandler())

from github import Github
from github.Auth import Token

github = Github(auth=Token(token=os.environ.get('GITHUB_TOKEN')))
repo = github.get_repo('getsentry/sentry')

from seer.bootup import bootup

bootup(__name__)

  from .autonotebook import tqdm as notebook_tqdm


<Flask '__main__'>

In [2]:



from pydantic import field_serializer, BaseModel
from tqdm import tqdm
from github.Commit import Commit
from typing import Any, Literal, NotRequired
from typing_extensions import TypedDict
from pydantic import AliasChoices, AliasGenerator, ConfigDict, Field, ValidationError, field_validator
from pydantic.alias_generators import to_camel, to_snake
import sentry_sdk

from seer.automation.autofix.models import IssueDetails, EventDetails

class EvalItem(BaseModel):
    raw_data: dict[str, Any]
    commit: Commit
    issue: IssueDetails
    event: EventDetails

    model_config = ConfigDict(
        arbitrary_types_allowed=True
    )

    @field_serializer('commit')
    def serialize_commit(self, commit: Commit, _info):
        return commit.sha
    
    @field_validator('commit', mode="before")
    @classmethod
    def validate_commit(cls, commit: Commit | str):
        return commit if isinstance(commit, Commit) else repo.get_commit(commit)
    
class EvalItemWithDiff(EvalItem):
    diff: str

In [3]:
import json

eval_file = '../data/full_eval_autofix_240314.json'

with open(eval_file, 'r') as file:
    tmp_autofix_data = json.load(file)

eval_data = [EvalItem.model_validate(item) for item in tmp_autofix_data]

print(f"Loaded {len(eval_data)} eval items")

Loaded 36 eval items


In [5]:
from seer.automation.autofix.autofix import Autofix
from seer.automation.autofix.tasks import ContinuationState
from seer.rpc import DummyRpcClient
from seer.automation.autofix.models import (
    AutofixContinuation,
    AutofixRequest,
    IssueDetails,
    RepoDefinition,
)

rpc_client = DummyRpcClient()
rpc_client.dry_run = True

request = AutofixRequest(
    organization_id=1,
    project_id=1,
    repos=[RepoDefinition(provider="github", owner="getsentry", name="sentry")],
    base_commit_sha=eval_data[0].commit.sha,
    issue=eval_data[0].issue,
)

state = ContinuationState(
    val=AutofixContinuation(request=AutofixRequest.model_validate(request)), rpc_client=rpc_client
)

CELERY_BROKER_URL not set


In [6]:
from sentence_transformers import SentenceTransformer
from seer.automation.autofix.autofix_context import AutofixContext
from seer.automation.autofix.event_manager import AutofixEventManager

embedding_model = SentenceTransformer('../models/autofix_embeddings_v0', trust_remote_code=True)

event_manager = AutofixEventManager(state)
context = AutofixContext(
    organization_id=request.organization_id,
    project_id=request.project_id,
    repos=request.repos,
    event_manager=event_manager,
    state=state,
    embedding_model=embedding_model
)
context.commit_changes = False
autofix = Autofix(context)

In [7]:
# Runs the autofix run
autofix_result = autofix.invoke(request)

Beginning autofix for issue 5059849041
on_autofix_step_update invoking...
on_autofix_step_update done
on_autofix_step_update invoking...
on_autofix_step_update done
on_autofix_step_update invoking...
on_autofix_step_update done
on_autofix_step_update invoking...
on_autofix_step_update done
Creating codebase index for repo sentry
on_autofix_step_update invoking...
on_autofix_step_update done
Loading repository to /var/folders/c8/ljt8gc_13j30r7lt_p842hrw0000gn/T/getsentry-sentry_21831a3a2935e295dfdcf40f800c77119fd90886m3gdbpjj/repo
Loaded repository to /var/folders/c8/ljt8gc_13j30r7lt_p842hrw0000gn/T/getsentry-sentry_21831a3a2935e295dfdcf40f800c77119fd90886m3gdbpjj/repo
Read 11683 documents:
  markdown: 21
  yaml: 52
  python: 5783
  json: 646
  bash: 14
  typescript: 171
  toml: 1
  tsx: 4765
  javascript: 47
  rst: 9
  html: 162
  css: 2
  lua: 7
  embedded_template: 3
Document chunking took 0.73 seconds
Processed document 1/11683
Document chunking took 0.02 seconds
Processed document 

KeyboardInterrupt: 

In [None]:
# WIP scoring the diffs

from langchain.chat_models.openai import ChatOpenAI
from github.Commit import Commit
from github.File import File

model = ChatOpenAI(model_name="gpt-4-0125-preview")

def score_fix(error_message, stacktrace, expected_solution, expected_diff, predicted_diff):
    model.invoke(f"""<issue>
{error_message}
{stacktrace}
</issue>

Given the above issue, we know the correct fix is:

<expected_solution>
<description>
{expected_solution}
</description>
<changes>
{expected_diff}
</changes>
</expected_solution>

The model predicted the following diff:

<predicted_solution>
{predicted_diff}
</predicted_solution>

Score how well the predicted solution matches the expected solution with a float score from 0 to 1, where 1 means the solution fully fixes the issue and 0 means the solution does not fix the issue at all.
- Consider the context of the issue and the diff
- Consider that there are multiple ways to fix an issue
- Return the score inside a <score> tag.""")


