In [1]:
!pip install -r ../requirements.txt

Collecting openai==1.6.1 (from -r ../requirements.txt (line 28))
  Using cached openai-1.6.1-py3-none-any.whl.metadata (17 kB)
Collecting packaging==23.1 (from -r ../requirements.txt (line 31))
  Using cached packaging-23.1-py3-none-any.whl.metadata (3.1 kB)
Collecting langsmith==0.0.87 (from -r ../requirements.txt (line 94))
  Using cached langsmith-0.0.87-py3-none-any.whl.metadata (10 kB)
Using cached openai-1.6.1-py3-none-any.whl (225 kB)
Using cached packaging-23.1-py3-none-any.whl (48 kB)
Using cached langsmith-0.0.87-py3-none-any.whl (55 kB)
Installing collected packages: packaging, openai, langsmith
  Attempting uninstall: packaging
    Found existing installation: packaging 23.2
    Uninstalling packaging-23.2:
      Successfully uninstalled packaging-23.2
  Attempting uninstall: openai
    Found existing installation: openai 1.14.1
    Uninstalling openai-1.14.1:
      Successfully uninstalled openai-1.14.1
  Attempting uninstall: langsmith
    Found existing installation: lan

A couple more libraries are needed for running the eval:

In [2]:
!pip install python-dotenv 'psycopg[binary,pool]' langchain langchain-openai

Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Using cached langsmith-0.1.27-py3-none-any.whl.metadata (13 kB)
Collecting openai<2.0.0,>=1.10.0 (from langchain-openai)
  Using cached openai-1.14.1-py3-none-any.whl.metadata (18 kB)
Collecting packaging<24.0,>=23.2 (from langchain-core<0.2.0,>=0.1.31->langchain)
  Using cached packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Using cached langsmith-0.1.27-py3-none-any.whl (68 kB)
Using cached openai-1.14.1-py3-none-any.whl (257 kB)
Using cached packaging-23.2-py3-none-any.whl (53 kB)
Installing collected packages: packaging, openai, langsmith
  Attempting uninstall: packaging
    Found existing installation: packaging 23.1
    Uninstalling packaging-23.1:
      Successfully uninstalled packaging-23.1
  Attempting uninstall: openai
    Found existing installation: openai 1.6.1
    Uninstalling openai-1.6.1:
      Successfully uninstalled openai-1.6.1
  Attempting uninstall: langsmith
    Found existing installation: langsmith 0.0.

In [3]:
import os
os.environ['DATABASE_URL'] = "postgresql+psycopg://root:seer@localhost:5433/seer"
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = "ai-autofix-evals"

from dotenv import load_dotenv
load_dotenv('../.env')

import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '../src')))

import logging

logger = logging.getLogger('autofix')
logger.setLevel(logging.DEBUG)
logger.handlers = []
logger.addHandler(logging.StreamHandler())

from github import Github
from github.Auth import Token

github = Github(auth=Token(token=os.environ.get('GITHUB_TOKEN')))
repo = github.get_repo('getsentry/sentry')

from seer.bootup import bootup

bootup(__name__)

  from .autonotebook import tqdm as notebook_tqdm


<Flask '__main__'>

In [4]:
from pydantic import field_serializer, BaseModel
from github.Commit import Commit
from typing import Any
from pydantic import ConfigDict, field_validator

from seer.automation.autofix.models import IssueDetails, EventDetails

class EvalItem(BaseModel):
    raw_data: dict[str, Any]
    commit: Commit
    issue: IssueDetails
    event: EventDetails

    model_config = ConfigDict(
        arbitrary_types_allowed=True
    )

    @field_serializer('commit')
    def serialize_commit(self, commit: Commit, _info):
        return commit.sha
    
    @field_validator('commit', mode="before")
    @classmethod
    def validate_commit(cls, commit: Commit | str):
        return commit if isinstance(commit, Commit) else repo.get_commit(commit)
    
class EvalItemWithDiff(EvalItem):
    diff: str

## Load the eval items:

In [5]:
import json

eval_file = '../data/full_eval_autofix_240314.json'

with open(eval_file, 'r') as file:
    tmp_autofix_data = json.load(file)

eval_data = [EvalItemWithDiff.model_validate(item) for item in tmp_autofix_data]

print(f"Loaded {len(eval_data)} eval items")

Loaded 36 eval items


## Prepare the Autofix pipeline:

In [16]:
from seer.automation.autofix.autofix import Autofix
from seer.automation.autofix.tasks import ContinuationState
from seer.rpc import DummyRpcClient
from seer.automation.autofix.models import (
    AutofixContinuation,
    AutofixRequest,
    IssueDetails,
    RepoDefinition,
)
from sentence_transformers import SentenceTransformer
from seer.automation.autofix.autofix_context import AutofixContext
from seer.automation.autofix.event_manager import AutofixEventManager

# Initializes the rpc client in DRY RUN mode
rpc_client = DummyRpcClient()
rpc_client.dry_run = True

request = AutofixRequest(
    organization_id=1,
    project_id=1,
    repos=[RepoDefinition(provider="github", owner="getsentry", name="sentry")],
    base_commit_sha=eval_data[0].commit.parents[0].sha,
    issue=eval_data[0].issue,
)

state = ContinuationState(
    val=AutofixContinuation(request=AutofixRequest.model_validate(request)), rpc_client=rpc_client
)

embedding_model = SentenceTransformer("../models/autofix_embeddings_v0", trust_remote_code=True)
embedding_model.max_seq_length = 4096

event_manager = AutofixEventManager(state)
context = AutofixContext(
    organization_id=request.organization_id,
    project_id=request.project_id,
    repos=request.repos,
    event_manager=event_manager,
    state=state,
    embedding_model=embedding_model,
)
context.commit_changes = False
autofix = Autofix(context)

Loaded codebase index for getsentry/sentry, with existing data


In [17]:
# Runs the autofix run
autofix_result = autofix.invoke(request)

Beginning autofix for issue 5059849041
on_autofix_step_update invoking...
on_autofix_step_update done
on_autofix_step_update invoking...
on_autofix_step_update done
on_autofix_step_update invoking...
on_autofix_step_update done
on_autofix_step_update invoking...
on_autofix_step_update done
on_autofix_step_update invoking...
on_autofix_step_update done
Updating codebase index for repo getsentry/sentry to d20e05ec5350501560e8e06f4aa9e4b445b286dd
on_autofix_step_update invoking...
on_autofix_step_update done
Updating codebase index with 386 changed files and 24 removed files...
Loading repository to /var/tmp/getsentry-sentry_d20e05ec5350501560e8e06f4aa9e4b445b286dd3pv8ez6w/repo
Loaded repository to /var/tmp/getsentry-sentry_d20e05ec5350501560e8e06f4aa9e4b445b286dd3pv8ez6w/repo
File not found: /var/tmp/getsentry-sentry_d20e05ec5350501560e8e06f4aa9e4b445b286dd3pv8ez6w/repo/static/app/components/events/autofix/autofixInstructionsModal.tsx
Document chunking took 0.01 seconds
Processed documen

In [13]:
context.get_codebase(1).working_sha

'f5dfe6b63a74039ec81614f9f07e2814ffa45dad'

In [15]:
print(context.get_codebase(1).repo_client.get_file_content('tests/sentry/issues/test_issue_velocity.py', sha='f5dfe6b63a74039ec81614f9f07e2814ffa45dad'))

Getting file contents for tests/sentry/issues/test_issue_velocity.py in getsentry/sentry on sha f5dfe6b63a74039ec81614f9f07e2814ffa45dad


import math
from datetime import datetime, timedelta
from unittest.mock import patch

from django.utils import timezone

from sentry.issues.issue_velocity import (
    DEFAULT_TTL,
    FALLBACK_TTL,
    STALE_DATE_KEY,
    THRESHOLD_KEY,
    TIME_TO_USE_EXISTING_THRESHOLD,
    calculate_threshold,
    fallback_to_stale_or_zero,
    get_latest_threshold,
    get_redis_client,
    update_threshold,
)
from sentry.tasks.post_process import locks
from sentry.testutils.cases import SnubaTestCase, TestCase
from sentry.testutils.helpers.datetime import freeze_time, iso_format
from sentry.testutils.silo import region_silo_test

WEEK_IN_HOURS = 7 * 24


@region_silo_test
@freeze_time()
class IssueVelocityTests(TestCase, SnubaTestCase):
    def setUp(self):
        self.now = timezone.now()
        self.utcnow = datetime.utcnow()
        super().setUp()

    def test_calculation_simple(self):
        """
        Tests threshold calculation for a single issue with the minimum number of events
    

## Scoring:

In [None]:
# WIP scoring the diffs

from langchain.chat_models.openai import ChatOpenAI
from github.Commit import Commit
from github.File import File

from seer.automation.autofix.models import AutofixOutput
from seer.automation.autofix.prompts import format_exceptions

model = ChatOpenAI(model_name="gpt-4-0125-preview")

item = eval_data[0]

def score_fix(eval_item: EvalItemWithDiff, predicted_output: AutofixOutput):
    model.invoke(f"""<issue>
<error_message>
{eval_item.event.title}
</error_message>
<exceptions>
{format_exceptions(eval_item.event.exceptions)}
</exceptions>
</issue>

Given the above issue, we know the correct fix is:

<expected_solution>
<description>
{eval_item.commit.commit.message}
</description>
<changes>
{eval_item.diff}
</changes>
</expected_solution>

The model outputted the following solution:

<predicted_solution>
{predicted_output.diff_str}
</predicted_solution>

Score how well the predicted solution matches the expected solution with a float score from 0 to 1, where 1 means the solution fully fixes the issue and 0 means the solution does not fix the issue at all.
- Consider the context of the issue and the diff
- Consider that there are multiple ways to fix an issue
- Return the score inside a <score> tag.""")

score_fix(item, autofix_result[0])
