This notebook will create a dataset from Sentry Issues <-> Github commits that reference a sentry issue and save it to langsmith.

Setup the github client and instantiate the repository.

In [None]:
import os
from github import Github
from github.Auth import Token

github = Github(auth=Token(token=os.environ.get('GITHUB_TOKEN')))
repo = github.get_repo('getsentry/sentry')

In [None]:
import requests
import os

def get_resolved_issues(organization_slug="sentry", project_slug="sentry", cursor=None):
    url = f"https://sentry.io/api/0/projects/{organization_slug}/{project_slug}/issues/?query=is:resolved error.type:TypeError"

    if cursor: 

    headers = {"Authorization": f"Bearer {os.environ.get('SENTRY_AUTH_TOKEN')}"}

    response = requests.get(url, headers=headers)

    result = response.json()

    if "detail" in result:
        raise Exception(result["detail"])

    return result, response.links["next"]


def get_issue_by_id(issue_id, organization_slug="sentry"):
    url = (
        f"https://sentry.io/api/0/organizations/{organization_slug}/issues/{issue_id}/"
    )
    headers = {"Authorization": f"Bearer {os.environ.get('SENTRY_AUTH_TOKEN')}"}

    response = requests.get(url, headers=headers)
    issue = response.json()

    if "detail" in issue and issue["detail"] == "The requested resource does not exist":
        raise Exception(f"Could not find issue with id {issue_id}")

    return issue


def get_issue_id_from_short_id(short_id, organization_slug="sentry"):
    url = f"https://sentry.io/api/0/organizations/{organization_slug}/shortids/{short_id}/"
    headers = {"Authorization": f"Bearer {os.environ.get('SENTRY_AUTH_TOKEN')}"}

    response = requests.get(url, headers=headers)
    result = response.json()

    if ( 
        "detail" in result
        and result["detail"] == "The requested resource does not exist"
    ):
        raise Exception(f"Could not find issue with short id {short_id}")

    return result["groupId"]


def get_details_for_issue(issue_id=None, short_id=None, organization_slug="sentry"):
    if issue_id is None and short_id is None:
        raise Exception("Either issue_id or short_id must be provided")
    if short_id:
        issue_id = get_issue_id_from_short_id(short_id, organization_slug)

    issue = get_issue_by_id(issue_id, organization_slug)

    url = f"https://sentry.io/api/0/organizations/{organization_slug}/issues/{issue['id']}/events/?full=true"
    headers = {"Authorization": f"Bearer {os.environ.get('SENTRY_AUTH_TOKEN')}"}
    response = requests.get(url, headers=headers)
    events = response.json()

    return dict(
        **issue,
        events=events[:1],
    )

In [None]:
from pydantic import field_serializer, BaseModel
from github.Commit import Commit
from typing import Any
from pydantic import ConfigDict, field_validator

from seer.automation.autofix.models import IssueDetails, EventDetails

class EvalItem(BaseModel):
    raw_data: dict[str, Any]
    commit: Commit
    issue: IssueDetails
    event: EventDetails

    model_config = ConfigDict(
        arbitrary_types_allowed=True
    )

    @field_serializer('commit')
    def serialize_commit(self, commit: Commit, _info):
        return commit.sha
    
    @field_validator('commit', mode="before")
    @classmethod
    def validate_commit(cls, commit: Commit | str):
        return commit if isinstance(commit, Commit) else repo.get_commit(commit)
    
class EvalItemWithDiff(EvalItem):
    diff: str

Get all the commits for a timeframe:

In [None]:
from datetime import timedelta
import datetime

days_ago = datetime.datetime.now() - timedelta(days=90)
commits = repo.get_commits(since=days_ago)
all_commits = [commit for commit in commits]
print('Total commits in this timeframe: ', commits.totalCount)

Filter it down to only commits with sentry issues

In [None]:
# Gets the commits with an id or url to a sentry issue
import re

has_sentry_issue_linked = []

for commit in all_commits:
    if 'SENTRY-' in commit.commit.message or 'https://sentry.sentry.io/issues/' in commit.commit.message:
        has_sentry_issue_linked.append(commit)

# Extracts the short id or id from the commit message

with_id_or_url = []

for commit in has_sentry_issue_linked:
    message = commit.commit.message
    issue_short_id_match = re.findall(r'SENTRY-.{4}', message)
    issue_short_id = issue_short_id_match[0] if issue_short_id_match else None
    issue_url = re.findall(r'https://sentry.sentry.io/issues/\d+', message)
    issue_id = issue_url[0].split('/')[-1] if issue_url else None

    if issue_short_id or issue_id:
        with_id_or_url.append((commit, issue_short_id, issue_id))

print('Commits with sentry issue id or url:', len(with_id_or_url))

Populate into eval items:

In [None]:
eval_items: list[EvalItem] = []
skipped_items: list[EvalItem] = []
error_count = 0

with tqdm(total=len(with_id_or_url)) as pbar: 
    for commit, short_id, issue_id in with_id_or_url:
        try:
            issue = get_details_for_issue(issue_id=issue_id, short_id=short_id)
            issue_details = IssueDetails.model_validate(issue)
            event_details = EventDetails.from_event(issue_details.events[0])

            eval_item = EvalItem(
                commit=commit,
                raw_data=issue,
                issue=issue_details,
                event=event_details
            )

            if len(event_details.exceptions) == 0:
                skipped_items.append(eval_item)
                continue

            eval_items.append(eval_item)
        except Exception as e:
            print(f'Error processing commit: {e}')
            error_count += 1
        finally:
            pbar.update(1)
        

print('Total eval items:', len(eval_items))
print('Total skipped items:', len(skipped_items))
print('Total errors:', error_count)

Prompt GPT so we can filter it down to only issues that are "actionable" which means in this case, given a sentry issue, it should be evident what the developer should do to fix it.

In [None]:
from langchain.chat_models.openai import ChatOpenAI
from github.Commit import Commit
from github.File import File
from tqdm import tqdm

model = ChatOpenAI(model_name="gpt-4-0125-preview")


def file_patch_to_str(file: File):
    return f"[{file.filename}]\n{file.patch}"


def explain_changes(error_msg, stack_str, commit_message, files_str):
    response = model.invoke(
        f"""<error_message>
{error_msg}
</error_message>
<stacktrace>
{stack_str}
</stacktrace>

A software engineer then created the following changes in a commit to fix the above issue:
<commit_message>
{commit_message}
</commit_message>
<changes>
{files_str}
</changes>

How would you describe the solution to the error in a short summary. Also describe what the root cause of the problem ended up being."""
    )

    return response.content


def determine_actionability(error_msg, stack_str, solution):
    response = model.invoke(
        f"""Given the provided information:
<information>
<error_message>
{error_msg}
</error_message>
<stacktrace>
{stack_str}
</stacktrace>
</information>

<expected_solution>
{solution}
</expected_solution>

Based on the error message and stacktrace, can the solution be inferred from the information given and access to reading the codebase? Why or why not?
Answer in the format:<response>yes/no</response><reason>reason for the response</reason>"""
    )
    comatch = re.match(r"<response>(.*?)</response>", response.content)
    if comatch and "yes" in comatch.group(1).lower():
        return True
    return False


fixable_items: list[EvalItem] = []
panel_n = 5
with tqdm(total=len(eval_items)) as pbar:
    for eval_item in eval_items:
        stacktrace = eval_item.event.exceptions[0].stacktrace

        stacktrace_str = stacktrace.to_str(max_frames=64)

        commit = eval_item.commit
        files = commit.files
        files_str = "\n".join([file_patch_to_str(file) for file in files])

        explain_result = explain_changes(
            issue_details.title, stacktrace_str, commit.commit.message, files_str
        )

        actionability_results = []
        final_result = False
        for _ in range(panel_n):
            actionability_result = determine_actionability(issue_details.title, stacktrace_str, explain_result)
            actionability_results.append(actionability_result)
            true_count = actionability_results.count(True)
            false_count = actionability_results.count(False)
            if true_count > panel_n / 2:
                final_result = True
                break
            if false_count > panel_n / 2:
                final_result = False
                break
        print(f"Issue: '{eval_item.issue.title}': ", final_result)
        
        if final_result:
            fixable_items.append(eval_item)
        
        pbar.update(1)

print('Total fixable items:', len(fixable_items))
print('Total non-fixable items:', len(eval_items) - len(fixable_items))

Populate the eval items that are fixable with their expected diffs

In [None]:
final_eval_items: list[EvalItemWithDiff] = []
for item in fixable_items:
    comparison = repo.compare(item.commit.commit.parents[0].sha, item.commit.sha)
    
    requester = repo._requester
    headers = {
        "Authorization": f"{requester._Requester__auth.token_type} {requester._Requester__auth.token}",  # type: ignore
        "User-Agent": requester._Requester__userAgent,  # type: ignore
    }
    diff_data = requests.get(comparison.diff_url, headers=headers).content.decode('utf-8')

    final_item = EvalItemWithDiff.model_validate(dict(
        **dict(item),
        diff=diff_data
    ))
    final_eval_items.append(final_item)

print('Total final eval items:', len(final_eval_items))

Dump the dataset to a json:

In [None]:
import json

def dump_items(items: list[EvalItem], filename: str):
    serialized_items = [item.model_dump(mode='json') for item in items]

    with open(filename, 'w') as f:
        json.dump(serialized_items, f)

dump_items(eval_items, '../data/eval_items.json')

Save dataset to langsmith:

In [None]:
from langsmith import Client

dataset_inputs = [item.model_dump(mode='json') for item in final_eval_items]

dataset_outputs = [{ "diff": item.diff } for item in final_eval_items]

client = Client()
dataset_name = "Autofix Eval Full 240314"

dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Autofix full eval made from mapping sentry <-> github commits for sentry project",
)
client.create_examples(
    inputs=dataset_inputs,
    outputs=dataset_outputs,
    dataset_id=dataset.id,
)