In [17]:
import pandas as pd
import langchain
import os
import time

from langchain.chat_models import ChatOllama
from langchain_google_genai import ChatGoogleGenerativeAI

from langchain.output_parsers import PydanticOutputParser
from langchain_core.output_parsers import BaseOutputParser
from langchain.prompts import ChatPromptTemplate

from prompts import system_prompt, user_prompt


In [2]:
df = pd.read_csv("prswithtasks.csv")

df.head()

Unnamed: 0,number,type,title,body,agent
0,7854,perf,Improve Rust compiler output,## Summary\n- tweak Rust compiler to print sim...,OpenAI_Codex
1,856,perf,perf(web): use route matcher in edge middleware,Use Next.js `matcher` in edge middleware to op...,Cursor
2,2463,perf,Improve Java compiler runtime emission,## Summary\n- only emit Java runtime helpers w...,OpenAI_Codex
3,70,perf,Improve TTI provider reliability,## Summary\n- verify proxies when fetching fro...,OpenAI_Codex
4,12025,perf,Add fast-paths for ToolLocationHelper property...,This PR adds fast-path implementations for two...,Copilot


In [7]:
class PerformanceClassifierParser(BaseOutputParser):
    """
    Parses the output of an LLM call to classify a pull request.
    The expected output is 'runtime', 'energy',.
    """

    def parse(self, text: str) -> str:
        """
        Parses the text output from the language model.

        Args:
            text: The text output from the language model.

        Returns:
            The parsed classification ('runtime', 'energy'').
        """
        cleaned_text = text.strip().lower()
        if cleaned_text in ["runtime", "energy"]:
            return cleaned_text
    

    def get_format_instructions(self) -> str:
        """Instructions on how the LLM should format its response."""
        return "Your output should be one of: 'runtime', or 'energy'."

parser = PerformanceClassifierParser()
print(parser.get_format_instructions())

Your output should be one of: 'runtime', or 'energy'.


In [18]:
model_name = 'gemini-2.5-flash'
llm = ChatGoogleGenerativeAI(model=model_name, google_api_key=os.getenv('GOOGLE_API_KEY'))

In [13]:
classification_prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("user", user_prompt),
])

parser = PerformanceClassifierParser()

In [6]:
print(parser.get_format_instructions())

Your output should be one of: 'runtime', or 'energy'. It should not be None!


In [14]:
chain = classification_prompt_template | llm | parser

In [8]:
classified_df = df.copy()

In [9]:
for idx, row in classified_df.iterrows():
    title = row['title']
    body = row['body']

    input_vars = {
        "format_instructions": parser.get_format_instructions(),
        "title": title,
        "body": body,
    }
    
    classification = chain.invoke(input_vars)
    classified_df.at[idx, "classification"] = classification
    
    print(f"Classification: {classification}")

KeyboardInterrupt: 

In [None]:
classified_df.to_csv("classified_prs.csv")

In [19]:
classified_df = pd.read_csv("classified_prs.csv")

In [20]:
classification_counts = classified_df['classification'].value_counts(dropna=False)
print(classification_counts)

classification
runtime    331
energy       7
NaN          2
Name: count, dtype: int64


In [23]:
nan_rows = classified_df[classified_df['classification'].isna()]

for idx, row in nan_rows.iterrows():
    title = row['title']
    body = row['body']

    input_vars = {
        "format_instructions": parser.get_format_instructions(),
        "title": title,
        "body": body,
    }
    print(f"[{idx}] | Title: {title}")
    classification = chain.invoke(input_vars)
    classified_df.at[idx, "classification"] = classification
    print(f"Row {idx} classified as: {classification}")

[14] | Title: [WIP] Request to reduce SQL query count for homepage posts.
Row 14 classified as: None
[44] | Title: Replace Math.DivRem with bit operations in BitArray for WASM performance
Row 44 classified as: None
