In [20]:
import github3
from dotenv import load_dotenv, dotenv_values
import libhoney
import os

load_dotenv()

github = github3.login(token=os.getenv('GITHUB_TOKEN'))
libhoney.init(writekey=os.getenv('HONEYCOMB_API_KEY'), dataset='otel-github-issues')

In [21]:
js_issues = github.issues_on(username='open-telemetry', repository='opentelemetry-js', state='open')
js_issues_filtered = [issue for issue in js_issues if "renovate-bot" not in issue.user.login and "forking-renovate[bot]" not in issue.user.login]
js_issues_json = [issue.as_json() for issue in js_issues_filtered]

In [22]:
js_contrib_issues = github.issues_on(username='open-telemetry', repository='opentelemetry-js-contrib', state='open')
js_contrib_issues_filtered = [issue for issue in js_contrib_issues if "renovate-bot" not in issue.user.login and "forking-renovate[bot]" not in issue.user.login]
js_contrib_issues_json = [issue.as_json() for issue in js_contrib_issues_filtered]

In [23]:
py_issues = github.issues_on(username='open-telemetry', repository='opentelemetry-python', state='open')
py_issues_json = [issue.as_json() for issue in py_issues]

In [24]:
py_contrib_issues = github.issues_on(username='open-telemetry', repository='opentelemetry-python-contrib', state='open')
py_contrib_issues_json = [issue.as_json() for issue in py_contrib_issues]

In [25]:
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List
import instructor
import concurrent.futures
from tqdm import tqdm


class Issue(BaseModel):
    title: str = Field(..., description="The title of the issue.")
    url: str = Field(..., description="The URL of the issue.")
    libraries: List[str] = Field(..., description="A list of libraries mentioned in the issue, in lower case.")
    detected_cloud_providers: List[str] = Field(..., description="A list of cloud providers mentioned in the issue, or inferred from the issue body.")
    body: str = Field(..., description="The body of the issue.")
    body_summary: str = Field(..., description="A 500 character summary of the issue body.")
    comment_count: int = Field(..., description="The number of comments on the issue.")
    updated_at: str = Field(..., description="The date and time the issue was last updated.")
    positive_reactions: int = Field(..., description="The number of positive reactions on the issue.")
    negative_reactions: int = Field(..., description="The number of negative reactions on the issue.")
    inferred_sentiment: str = Field(..., description="The sentiment of the issue body (one of 'positive', 'negative', or 'neutral').")
    source_repo: str = Field(..., description="The name of the repository where the issue was opened.")

client = instructor.from_openai(OpenAI())

responses = []

def process_issue(issue):
    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": "You are a system that parses GitHub issues and extracts data from them."
            },
            {
                "role": "user",
                "content": issue
            }
        ],
        response_model=Issue
    )
    return resp

def print_status(completed, total):
    print(f"Processed {completed} of {total} items")

# Expects a list of JSON objects, each representing a GitHub issue
def run_issue_process(issues):
    responses = []
    total_tasks = len(issues)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {executor.submit(process_issue, json_content): json_content for json_content in issues}
        completed_tasks = 0

        for future in tqdm(concurrent.futures.as_completed(futures), total=total_tasks):
            try:
                response = future.result()
                responses.append(response)
            except Exception as exc:
                print(f'Generated an exception: {exc}')
            completed_tasks += 1
    return responses

In [26]:
js_responses = run_issue_process(js_issues_json)
js_contrib_responses = run_issue_process(js_contrib_issues_json)
py_responses = run_issue_process(py_issues_json)
py_contrib_responses = run_issue_process(py_contrib_issues_json)


100%|██████████| 290/290 [03:14<00:00,  1.49it/s]
100%|██████████| 194/194 [02:17<00:00,  1.41it/s]
100%|██████████| 298/298 [02:15<00:00,  2.19it/s]
100%|██████████| 547/547 [04:21<00:00,  2.09it/s]


In [28]:
import libhoney

def send_to_honeycomb(response):
    evt = libhoney.Event()
    for field, value in response.dict().items():
        if isinstance(value, list):
            for item in value:
                evt.add_field(field, item)
        else:
            evt.add_field(field, value)
    evt.send()

for response in js_contrib_responses:
    send_to_honeycomb(response)
for response in js_responses:
    send_to_honeycomb(response)
for response in py_contrib_responses:
    send_to_honeycomb(response)
for response in py_responses:
    send_to_honeycomb(response)