In [426]:
from langchain.prompts import PromptTemplate, FewShotPromptTemplate
import pandas as pd
from pprint import pprint
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
import os
import re
os.environ["OPENAI_API_KEY"] = ""

In [412]:
# Load agreement dataset
file_path = "disagreement_highlighted.xlsx"
df = pd.read_excel(file_path)
df

Unnamed: 0,full_name,Label_Patrick,Label_Leuson,Last_commit_date,First_commit_date,HashiCorp Sentinel,Open Policy Agent (OPA),Pulumi,Cedar Policy Language (CPL),Kyverno OSS,...,has_sentinel,has_pulumi,has_cedar,has_kyverno,has_custodian,has_awsconfigcloudgaurd,has_opagatekeeper,contributors_count,readme_content,Disagreement
0,a2-4am/4cade,Application System,Documentations,2025-05-10T04:14:23Z,2025-04-22T02:29:57Z,0,0,0,0,0,...,False,False,False,False,False,True,False,17,# Is this page for you?\n\n[Download the lates...,True
1,AdminTurnedDevOps/kubernetes-real-world-course,Documentations,Documentations,2024-03-27T11:49:06Z,2023-06-03T14:10:35Z,0,0,0,0,0,...,False,True,False,False,False,False,True,2,,False
2,alcideio/rbac-tool,Toolkit,DevOPs,2024-10-29T19:18:31Z,2022-06-16T05:14:36Z,0,0,0,0,0,...,False,True,False,False,False,False,True,14,![release](https://img.shields.io/github/v/rel...,True
3,amigavision/AmigaVision,Application System,Documentations,2025-05-10T20:19:25Z,2025-04-09T11:11:47Z,0,0,0,0,0,...,False,False,False,False,True,False,False,4,# AmigaVision\n\n(The latest version of this d...,True
4,anderseknert/opa-policy-composition,Documentations,Documentations,2024-06-18T19:11:06Z,2021-03-24T21:07:35Z,0,8,0,0,0,...,False,False,False,False,False,False,False,2,# opa-policy-composition\n\nExample policies d...,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,worldbank/sdg-metadata,Application System,Documentations,2024-11-20T16:06:52Z,2024-04-04T22:06:12Z,0,0,0,0,0,...,False,False,False,False,True,False,False,19,# SDG Metadata Translation Pilot\n\nEvaluating...,True
395,X-lab2017/open-digger,Toolkit,Application System,2025-05-16T09:24:25Z,2024-12-10T11:58:51Z,0,0,0,0,0,...,False,False,False,False,True,False,False,47,# OpenDigger\n\n[![apache2](https://img.shield...,True
396,XgridInc/xc3,Toolkit,Toolkit,2024-04-19T06:35:38Z,2023-04-19T07:16:08Z,0,0,0,0,0,...,False,False,False,False,True,False,False,11,<br>\n\n[![License](https://img.shields.io/bad...,False
397,yeo/betterdev.link,Documentations,Documentations,2025-03-04T20:27:34Z,2024-10-14T02:23:52Z,0,0,0,0,0,...,False,False,False,False,True,False,False,10,# Better Dev Link\n\nA weekly/daily news lette...,False


In [413]:
# Cleaning function for README content
def clean_readme(readme: str, max_chars=1500) -> str:
    if not isinstance(readme, str):
        return ""
    # Remove CLUSTER_DOMAIN
    readme = re.sub(r'\bCLUSTER_DOMAIN\b', '', readme, flags=re.IGNORECASE)
    # Remove URLs (http, https, www)
    readme = re.sub(r'https?://\S+|www\.\S+', '', readme)
    
    # Remove email addresses
    readme = re.sub(r'\b[\w\.-]+@[\w\.-]+\.\w+\b', '', readme)

    # Remove IP addresses
    readme = re.sub(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', '', readme)
    readme = re.sub(r'!\[.*?\]\(.*?\)', '', readme)  # remove images
    readme = re.sub(r'\[.*?\]\(.*?\)', '', readme)   # remove markdown links
    readme = re.sub(r'<[^>]+>', '', readme)          # remove HTML
    # Remove all JSON-like structures (array or object)
    readme = re.sub(r'```json.*?```', '', readme, flags=re.DOTALL | re.IGNORECASE)  # remove fenced JSON blocks
    readme = re.sub(r'\[\s*\{.*?\}\s*\]', '', readme, flags=re.DOTALL)              # remove JSON arrays of objects
    readme = re.sub(r'\{\s*".*?".*?\}', '', readme, flags=re.DOTALL)                # remove standalone JSON objects
    
    readme = re.sub(r'\bcritical\b[.,;:!?"]*', '', readme, flags=re.IGNORECASE) # Remove the word "critical" (case-insensitive, whole word)
    readme = re.sub(r'"critical"\s*:\s*\{.*?\}(,)?', '', readme, flags=re.IGNORECASE | re.DOTALL) # Remove JSON block where "critical" is a key (non-greedy match for safety)
    # Remove symbols and digits
    readme = re.sub(r'[\"\'*#`~=|\\/\[\]\{\}\(\)\d]', '', readme)
    readme = re.sub(r'--+', ' ', readme)  # remove double dashes
    readme = re.sub(r'\s+', ' ', readme).strip()  # normalize spaces
    readme = re.sub(r'\s+', ' ', readme).strip()     # normalize spaces
    return readme
    # return readme[:max_chars] + ("..." if len(readme) > max_chars else "")

In [414]:
# Clean the README field
df["readme_content"] = df["readme_content"].apply(clean_readme)

In [415]:
df.fillna("None", inplace=True)

In [416]:
# Keep only agreed rows (Patrick == Leuson)
df_agreed = df[df["Label_Patrick"] == df["Label_Leuson"]].copy()
df_agreed.rename(columns={"Label_Patrick": "Label"}, inplace=True)
df_agreed

Unnamed: 0,full_name,Label,Label_Leuson,Last_commit_date,First_commit_date,HashiCorp Sentinel,Open Policy Agent (OPA),Pulumi,Cedar Policy Language (CPL),Kyverno OSS,...,has_sentinel,has_pulumi,has_cedar,has_kyverno,has_custodian,has_awsconfigcloudgaurd,has_opagatekeeper,contributors_count,readme_content,Disagreement
1,AdminTurnedDevOps/kubernetes-real-world-course,Documentations,Documentations,2024-03-27T11:49:06Z,2023-06-03T14:10:35Z,0,0,0,0,0,...,False,True,False,False,False,False,True,2,,False
4,anderseknert/opa-policy-composition,Documentations,Documentations,2024-06-18T19:11:06Z,2021-03-24T21:07:35Z,0,8,0,0,0,...,False,False,False,False,False,False,False,2,opa-policy-composition Example policies demons...,False
5,aptakube/kubespec.dev,DevOPs,DevOPs,2025-04-27T13:19:31Z,2025-01-22T09:26:59Z,0,0,0,0,181,...,False,False,False,True,False,False,False,11,Kubernetes Spec Explorer üëâ Live at - Tree view...,False
6,aquasecurity/postee,Application System,Application System,2024-09-28T05:52:51Z,2023-06-01T20:37:52Z,0,38,0,0,0,...,False,False,False,False,False,False,False,28,Notice: Postee is no longer under active devel...,False
10,argoproj/argo-cd,DevOPs,DevOPs,2025-05-15T20:58:43Z,2025-05-09T00:14:31Z,0,0,0,0,11,...,False,False,False,True,False,False,False,1654,Releases: !Release Version !Artifact HUB !SLSA...,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
388,walt-id/waltid-ssikit,Toolkit,Toolkit,2024-07-16T13:58:10Z,2023-08-30T12:00:07Z,0,6,0,0,0,...,False,False,False,False,False,False,False,17,MOVED to the new repo here > SSI Kit by Use di...,False
393,wkspower/wks-platform,Application System,Application System,2025-05-14T16:40:41Z,2025-04-18T15:56:19Z,0,14,0,0,0,...,False,False,False,False,False,False,False,17,WKS Platform !License On-line documentation Co...,False
396,XgridInc/xc3,Toolkit,Toolkit,2024-04-19T06:35:38Z,2023-04-19T07:16:08Z,0,0,0,0,0,...,False,False,False,False,True,False,False,11,!License !Docs !Slack !Open AI Reviewer !Code ...,False
397,yeo/betterdev.link,Documentations,Documentations,2025-03-04T20:27:34Z,2024-10-14T02:23:52Z,0,0,0,0,0,...,False,False,False,False,True,False,False,10,Better Dev Link A weeklydaily news letter of r...,False


In [417]:
df_agreed = df_agreed[["full_name", "topics", "description", "readme_content", "Label"]].reset_index(drop=True)
df_agreed

Unnamed: 0,full_name,topics,description,readme_content,Label
0,AdminTurnedDevOps/kubernetes-real-world-course,,,,Documentations
1,anderseknert/opa-policy-composition,"opa,open-policy-agent,rego",Example policies demonstrating policy composit...,opa-policy-composition Example policies demons...,Documentations
2,aptakube/kubespec.dev,kubernetes,Kubernetes Spec Explorer,Kubernetes Spec Explorer üëâ Live at - Tree view...,DevOPs
3,aquasecurity/postee,"aqua,automation,cloud-native,devsecops,docker,...",Notice: Postee is no longer under active devel...,Notice: Postee is no longer under active devel...,Application System
4,argoproj/argo-cd,"argo,argo-cd,cd,ci-cd,cicd,continuous-delivery...",Declarative Continuous Deployment for Kubernetes,Releases: !Release Version !Artifact HUB !SLSA...,DevOPs
...,...,...,...,...,...
178,walt-id/waltid-ssikit,"blockchain,cryptography,decentralized-identifi...",All-In-One SSI infrastructure toolkit,MOVED to the new repo here > SSI Kit by Use di...,Toolkit
179,wkspower/wks-platform,,WKS Platform is a cutting-edge Adaptive Case M...,WKS Platform !License On-line documentation Co...,Application System
180,XgridInc/xc3,"cloud,control,cost,optimize",XC3 is a cloud agnostic and risk free package ...,!License !Docs !Slack !Open AI Reviewer !Code ...,Toolkit
181,yeo/betterdev.link,,Links to improve programing skill,Better Dev Link A weeklydaily news letter of r...,Documentations


In [418]:
def split_train_test(df: pd.DataFrame, test_size: float = 0.2, random_state: int = 42):
    """
    Split the dataframe into a train and test set using stratified random sampling based on the 'Label' column.
    
    Parameters:
    - df (pd.DataFrame): The dataframe containing the data.
    - test_size (float): The proportion of the data to be used for testing (default is 0.2 for 80-20 split).
    - random_state (int): The seed used by the random number generator (default is 42).
    
    Returns:
    - train_df (pd.DataFrame): The training dataset.
    - test_df (pd.DataFrame): The testing dataset.
    """
    # Stratified split using 'Label' for stratification
    train_df, test_df = train_test_split(df, test_size=test_size, stratify=df['Label'], random_state=random_state)
    
    return train_df, test_df

In [419]:
df_agreed_train_data, df_agreed_test_data = split_train_test(df_agreed)

In [420]:
df_agreed_train_data

Unnamed: 0,full_name,topics,description,readme_content,Label
126,openshift/managed-cluster-config,osdv4,Static deployable artifacts for managed OSD cl...,managed-cluster-config repository This repo co...,Application System
163,sphenlee/waterwheel,,A workflow scheduler based on petri-nets,Waterwheel Waterwheel is a job scheduler simil...,Application System
54,devxp-tech/gitops,"argo,argo-cd,argo-events,argo-rollouts,argo-wo...",üèóÔ∏è GitOps Repository,!main !Quality Gate Status !App Status !GitHub...,DevOPs
48,deliciousmods/1956_beta,,Road to 56 Beta Build,Road to Beta Build _Operation_Manstein .... Co...,Application System
148,Resourcely-Inc/cloud-guardrails,,Open-source best practices for protecting a se...,Cloud Guardrails There are many best practices...,Documentations
...,...,...,...,...,...
13,aws/aws-sdk-net,,The official AWS SDK for .NET. For more inform...,AWS SDK for .NET !Gitter The AWS SDK for .NET ...,Toolkit
116,open-policy-agent/conftest,"kubernetes,open-policy-agent,openpolicyagent,r...",Write tests against structured configuration d...,Conftest !Go Report Card !Netlify Conftest hel...,Toolkit
83,hogeschool/webdev-semester,,,,Documentations
31,cdklabs/cdk-validator-cfnguard,,,CDK CFN Guard Validator Plugin !cdk-constructs...,Toolkit


In [421]:
df_agreed_test_data

Unnamed: 0,full_name,topics,description,readme_content,Label
112,NVIDIA/gpu-operator,"cuda,gpu,kubernetes,nvidia","NVIDIA GPU Operator creates, configures, and m...",!license !pipeline status !coverage report NVI...,Toolkit
72,ghostunnel/ghostunnel,"crypto,go,hsm,keychain,pkcs11,proxy,security,s...",A simple SSL/TLS proxy with mutual authenticat...,Ghostunnel !license !release !docker !test !co...,Application System
17,Azure-Samples/azure-digital-twins-getting-started,,Resources for getting started with Azure Digit...,page_type: sample languages: - json products: ...,Documentations
157,sergueik/springboot_study,"docker,spring-boot",basic project collection exploring spring boot...,,Documentations
161,solo-io/hoot,,code from hoot episodes,"Hoot - Learn Kubernetes, Envoy, Istio, eBPF an...",Documentations
108,microsoft/rego-cpp,"c,cpp,opa,policy,policy-engine,python,rust",A C++ interpreter for the OPA policy language ...,rego-cpp This project is an effort to create a...,Toolkit
24,bacalhau-project/bacalhau,"ai-art,ai-data-collection,ai-pipeline,batch-pr...","Community-driven, simple, yet powerful framewo...",Globally Distributed Compute Orchestrator ‚ö°Com...,Toolkit
109,mongodb/mongodb-enterprise-kubernetes,"cloud-manager,kubernetes,kubernetes-operator,m...",MongoDB Enterprise Kubernetes Operator,MongoDB Enterprise Kubernetes Operator Welcome...,DevOPs
123,open-policy-agent/opa-docker-authz,"authorization,docker,opa",A policy-enabled authorization plugin for Docker.,opa-docker-authz This project is used to show ...,Toolkit
20,Azure/ato-toolkit,,On this page you'll find everything you need t...,Welcome to Azures DoD DevSecOps Enterprise Ope...,Documentations


In [422]:
print(f"Train dataset size: {len(df_agreed_train_data)}")
print(f"Test dataset size: {len(df_agreed_test_data)}")

Train dataset size: 146
Test dataset size: 37


In [423]:
# Keep only disagreed rows (Patrick == Leuson)
df_disagreed = df[df["Label_Patrick"] != df["Label_Leuson"]].copy()
# df_disagreed.rename(columns={"Label_Patrick": "Label"}, inplace=True)
df_disagreed = df_disagreed[["full_name", "topics", "description", "readme_content", "Label_Patrick", "Label_Leuson"]].reset_index(drop=True)
df_disagreed

Unnamed: 0,full_name,topics,description,readme_content,Label_Patrick,Label_Leuson
0,a2-4am/4cade,hacktoberfest,"100s of games at your fingertips, as long as y...",Is this page for you? Download the latest Tota...,Application System,Documentations
1,alcideio/rbac-tool,"access-control,acl,authorization,cluster,k8s-c...",Rapid7 | insightCloudSec | Kubernetes RBAC Pow...,!release !Go Version !Build !License !Tweet in...,Toolkit,DevOPs
2,amigavision/AmigaVision,"amiga,emulation,fpga,preservation",The ultimate Amiga games & demo scene setup fo...,AmigaVision The latest version of this documen...,Application System,Documentations
3,aquasecurity/tracee-action,"ebpf,github-actions,runtime-scanner,security",Protect GitHub Actions with Tracee,This project is for demonstration purpose only...,Documentations,Application System
4,aquasecurity/trivy,"containers,devsecops,docker,go,golang,hacktobe...","Find vulnerabilities, misconfigurations, secre...",!GitHub Releaserelease-imgrelease !Testtest-im...,Toolkit,Application System
...,...,...,...,...,...,...
211,web3privacy/explorer-data,"database,privacy",Privacy Explorer Data Repository,WebPrivacy Now Data Repository You can createe...,Documentations,Database
212,webiny/webiny-js,"aws,aws-lambda,cloud,cms,graphql,headless,head...",Open-source serverless enterprise CMS. Include...,Open-Source Serverless Enterprise CMS !Prettie...,Application System,Toolkit
213,whchoi98/myeks,,,,Application System,Documentations
214,worldbank/sdg-metadata,,SDG Metadata Translation Pilot,SDG Metadata Translation Pilot Evaluating mach...,Application System,Documentations


In [394]:
# Convert few-shot examples to list of dictionaries
def format_example(row):
    return {
        "input": f"""---
Project Name: {row['full_name']}
Topics: {row['topics']}
Description: {row['description']}
Readme Snippet: {row['readme_content']}
""",
        "output": row['Label']
    }

In [395]:
examples = df_agreed_train_data.apply(format_example, axis=1).tolist()

In [396]:
# examples[120]

In [397]:
# Compute semantic similarity
selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(),  # or HuggingFaceEmbeddings
    Chroma,
    k=4
)

In [398]:
import uuid

In [399]:
vectorstore_path = f"./chroma_index_{uuid.uuid4()}"
os.makedirs(vectorstore_path, exist_ok=True)

In [400]:
# # Compute semantic similarity
# selector = SemanticSimilarityExampleSelector.from_examples(
#     examples,
#     OpenAIEmbeddings(),  # or HuggingFaceEmbeddings
#     Chroma,
#     k=3,
#     input_keys=["input"],
#     vectorstore_kwargs={"persist_directory": vectorstore_path}
# )

In [401]:
# Format example rows into dictionary format expected by LangChain
example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="{input}\nLabel: {output}"
)

In [402]:
# Compose final few-shot prompt template
prefix = """You are an expert in software engineering and OSS analysis. Your task is to classify open-source projects into one of the following predefined categories based on their purpose, description, and README content.

The categories are:

DevOps: Projects that leverage infrastructure and operations tools (e.g., Kubernetes, Ansible, Docker) to automate the provisioning, deployment, and governance of software applications.

Toolkit: Standalone libraries, frameworks, APIs, plugins, or modules that offer reusable functionalities or components to simplify software development.

MLOps: These projects combine AI/ML models with DevOps tools and practices to automate the ML lifecycle, including model training, deployment, monitoring, and governance.

Documentation: Projects that primarily serve as documentation, tutorials, workshops, demo or basic projects example, use case project.

AI/Research: Academic or experimental projects involving AI/ML models or techniques.

Application System: These are software projects or programs. They may include web applications or traditional systems without AI/ML components.

Each example below contains the project‚Äôs metadata and its correct category label. Learn from these examples to classify the next project.
"""

suffix = """---
Project Name: {full_name}
Topics: {topics}
Description: {description}
Readme Snippet: {readme_snippet}
Label:
Rationale:"""

In [403]:
# Compute Prompt
prompt = FewShotPromptTemplate(
    example_selector=selector,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["full_name", "topics", "description", "readme_snippet"]
)

In [404]:
# Initialize the model with gpt-4o-mini and your API key
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.2
)

In [405]:
# Compose your chain (prompt must be previously defined and valid)
chain = prompt | llm

In [406]:
def classify_projects_with_llm(df, chain):
    results = []

    for _, row in df.iterrows():
        input_data = {
            "full_name": row.get("full_name", "None"),
            "topics": row.get("topics", "None"),
            "description": row.get("description", "None"),
            "readme_snippet": row.get("readme_content", "None")
        }

        try:
            response = chain.invoke(input_data)
            response_text = response.content.strip()

            # Simple parsing based on the expected response format
            label_line = response_text.split('\n')[0]
            label = label_line.replace("Label:", "").strip()

            rationale = "\n".join(response_text.split('\n')[1:]).strip()

        except Exception as e:
            label = "ERROR"
            rationale = f"Error during classification: {e}"

        results.append({
            "predicted_label": label,
            "rationale": rationale
        })

    # Combine results with the original dataframe
    result_df = df.copy()
    result_df = result_df.reset_index(drop=True)
    result_df[["predicted_label", "rationale"]] = pd.DataFrame(results)

    return result_df

In [407]:
classified_test_df = classify_projects_with_llm(df_agreed_test_data, chain)

# Show result
classified_test_df[["full_name", "Label", "predicted_label", "rationale"]].head()

Unnamed: 0,full_name,Label,predicted_label,rationale
0,NVIDIA/gpu-operator,Toolkit,DevOps,Rationale: The NVIDIA GPU Operator is designed...
1,ghostunnel/ghostunnel,Application System,DevOps,Rationale: Ghostunnel is a TLS proxy that prov...
2,Azure-Samples/azure-digital-twins-getting-started,Documentations,Documentation,"Rationale: The project ""Azure-Samples/azure-di..."
3,sergueik/springboot_study,Documentations,Application System,"Rationale: The project ""sergueik/springboot_st..."
4,solo-io/hoot,Documentations,Documentation,"Rationale: The project ""solo-io/hoot"" is prima..."


In [410]:
df_disagreed.head()

Unnamed: 0,full_name,topics,description,readme_content
0,a2-4am/4cade,hacktoberfest,"100s of games at your fingertips, as long as y...",Is this page for you? Download the latest Tota...
1,alcideio/rbac-tool,"access-control,acl,authorization,cluster,k8s-c...",Rapid7 | insightCloudSec | Kubernetes RBAC Pow...,!release !Go Version !Build !License !Tweet in...
2,amigavision/AmigaVision,"amiga,emulation,fpga,preservation",The ultimate Amiga games & demo scene setup fo...,AmigaVision The latest version of this documen...
3,aquasecurity/tracee-action,"ebpf,github-actions,runtime-scanner,security",Protect GitHub Actions with Tracee,This project is for demonstration purpose only...
4,aquasecurity/trivy,"containers,devsecops,docker,go,golang,hacktobe...","Find vulnerabilities, misconfigurations, secre...",!GitHub Releaserelease-imgrelease !Testtest-im...


In [408]:
classified_test_df.to_excel("Test_data_evaluation.xlsx")

In [424]:
classified_df = classify_projects_with_llm(df_disagreed, chain)

# Show result
classified_df[["full_name", "Label_Patrick", "Label_Leuson", "predicted_label", "rationale"]].head()

Unnamed: 0,full_name,Label_Patrick,Label_Leuson,predicted_label,rationale
0,a2-4am/4cade,Application System,Documentations,Application System,"Rationale: The project ""4cade"" is focused on p..."
1,alcideio/rbac-tool,Toolkit,DevOPs,DevOps,"Rationale: The project ""alcideio/rbac-tool"" fo..."
2,amigavision/AmigaVision,Application System,Documentations,Application System,"Rationale: The project ""AmigaVision"" is primar..."
3,aquasecurity/tracee-action,Documentations,Application System,DevOps,"Rationale: The project ""aquasecurity/tracee-ac..."
4,aquasecurity/trivy,Toolkit,Application System,DevOps,"Rationale: The project ""aquasecurity/trivy"" is..."


In [425]:
classified_df.to_excel("llm_annotated_data.xlsx")

In [427]:
classified_test_df = pd.read_excel("Test_data_evaluation.xlsx")

In [428]:
def evaluate_llm_predictions_sklearn(df, label_col="Label", pred_col="predicted_label"):
    # Standardize text: lowercase and strip
    y_true = df[label_col].astype(str).str.strip().str.lower()
    y_pred = df[pred_col].astype(str).str.strip().str.lower()

    # Accuracy
    accuracy = accuracy_score(y_true, y_pred)

    # Classification report
    report = classification_report(y_true, y_pred, output_dict=True)

    # Confusion matrix
    labels = sorted(set(y_true) | set(y_pred))  # all labels that appear in either column
    cm = confusion_matrix(y_true, y_pred, labels=labels)

    return {
        "accuracy": round(accuracy, 3),
        "classification_report": report,
        "confusion_matrix": cm,
        "labels": labels
    }

In [429]:
results = evaluate_llm_predictions_sklearn(classified_test_df)

print("‚úÖ Accuracy:", results["accuracy"])
print("\nüìã Classification Report:")
pprint(results["classification_report"])

print("\nüìä Confusion Matrix:")
cm_df = pd.DataFrame(results["confusion_matrix"], index=results["labels"], columns=results["labels"])
print(cm_df)


‚úÖ Accuracy: 0.351

üìã Classification Report:
{'accuracy': 0.35135135135135137,
 'application system': {'f1-score': 0.5,
                        'precision': 0.6666666666666666,
                        'recall': 0.4,
                        'support': 5.0},
 'devops': {'f1-score': 0.4444444444444444,
            'precision': 0.2857142857142857,
            'recall': 1.0,
            'support': 4.0},
 'documentation': {'f1-score': 0.0,
                   'precision': 0.0,
                   'recall': 0.0,
                   'support': 0.0},
 'documentations': {'f1-score': 0.0,
                    'precision': 0.0,
                    'recall': 0.0,
                    'support': 14.0},
 'macro avg': {'f1-score': 0.3460038986354776,
               'precision': 0.38095238095238093,
               'recall': 0.47692307692307695,
               'support': 37.0},
 'mlops': {'f1-score': 0.5,
           'precision': 0.3333333333333333,
           'recall': 1.0,
           'support': 1.0},
 '

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [374]:
first_row = df_agreed_test_data.iloc[10]

full_name = first_row['full_name']
topics = first_row['topics']
description = first_row['description']
readme_content = first_row['readme_content']
label = first_row['Label']

# # Print them (optional)
# print("Full name:", full_name)
# print("Topics:", topics)
# print("Description:", description)
# print("Readme:", readme_content)
# print("Label:", label)

In [375]:
input_example = {
    "full_name": full_name,
    "topics": topics,
    "description": description,
    "readme_snippet": readme_content
}

In [376]:
selected = selector.select_examples(input_example)

# for i, ex in enumerate(selected):
#     print(f"\n--- Example {i+1} ---")
#     print(example_prompt.format(**ex))

In [377]:
rendered_prompt = prompt.format(**input_example)
# print(rendered_prompt)

In [378]:
# Example project classification input
response = chain.invoke({
    "full_name": full_name,
    "topics": topics,
    "description": description,
    "readme_snippet": readme_content
})

In [379]:
# print(chain.input_schema.schema())

In [380]:
# Print LLM output
print(response.content)

Label: Documentation

Rationale: The project "anderseknert/opa-policy-composition" provides example policies demonstrating policy composition in Rego, which indicates that it serves as a resource for learning and understanding how to use OPA with policy composition. The README snippet mentions that these examples accompany a blog post, further emphasizing its purpose as a documentation resource rather than a standalone tool or application.


In [316]:
df_agreed_test_data.head(10)

Unnamed: 0,full_name,topics,description,readme_content,Label
112,NVIDIA/gpu-operator,"cuda,gpu,kubernetes,nvidia","NVIDIA GPU Operator creates, configures, and m...",!license !pipeline status !coverage report NVI...,Toolkit
72,ghostunnel/ghostunnel,"crypto,go,hsm,keychain,pkcs11,proxy,security,s...",A simple SSL/TLS proxy with mutual authenticat...,Ghostunnel !license !release !docker !test !co...,Application System
17,Azure-Samples/azure-digital-twins-getting-started,,Resources for getting started with Azure Digit...,page_type: sample languages: - json products: ...,Documentations
157,sergueik/springboot_study,"docker,spring-boot",basic project collection exploring spring boot...,,Documentations
161,solo-io/hoot,,code from hoot episodes,"Hoot - Learn Kubernetes, Envoy, Istio, eBPF an...",Documentations
108,microsoft/rego-cpp,"c,cpp,opa,policy,policy-engine,python,rust",A C++ interpreter for the OPA policy language ...,rego-cpp This project is an effort to create a...,Toolkit
24,bacalhau-project/bacalhau,"ai-art,ai-data-collection,ai-pipeline,batch-pr...","Community-driven, simple, yet powerful framewo...",Globally Distributed Compute Orchestrator ‚ö°Com...,Toolkit
109,mongodb/mongodb-enterprise-kubernetes,"cloud-manager,kubernetes,kubernetes-operator,m...",MongoDB Enterprise Kubernetes Operator,MongoDB Enterprise Kubernetes Operator Welcome...,DevOPs
123,open-policy-agent/opa-docker-authz,"authorization,docker,opa",A policy-enabled authorization plugin for Docker.,opa-docker-authz This project is used to show ...,Toolkit
20,Azure/ato-toolkit,,On this page you'll find everything you need t...,Welcome to Azures DoD DevSecOps Enterprise Ope...,Documentations


In [315]:
# Print them (optional)
print("Full name:", full_name)
print("Topics:", topics)
print("Description:", description)
print("Readme:", readme_content)
print("Label:", label)

Full name: solo-io/hoot
Topics: None
Description: code from hoot episodes
Readme: Hoot - Learn Kubernetes, Envoy, Istio, eBPF and GraphQL We understand it is important for you to learn Envoy, Istio, Kubernetes, eBPF and GraphQL as part of your journey to cloud native so you can make sense of any technology or architecture decision. Hoot is designed to help you learn these technologies so you can be well prepared at your job! Upcoming episodes View Episode Calendar Add Google Calendar Suggest a topic Please open an issue if you have an idea for a topic we should cover or a guest we should invite. Previous episodes This repo contains the code, slides and show notes for our Hoot series: The full playlist: - Youtube Playlist Videos: - Episode : Intro to envoy - - Episode : Observe envoy - - Episode : Securing enovy - - Episode : Envoy, XDS - - Episode : Envoy filters - - Episode : Envoy WASM filters - - Episode : Into to OPA - - Episode : OPA + Envoy - - Episode : GitOps + Flux - - Episode

In [None]:
# Re-import libraries after kernel reset
# from langchain.prompts import PromptTemplate, FewShotPromptTemplate
# import pandas as pd
# from sklearn.model_selection import train_test_split

# Load agreement dataset
file_path = "/mnt/data/RQ2_Agreement_Dataset.xlsx"
df = pd.read_excel(file_path)

# Keep only agreed rows (Patrick == Leuson)
df_agreed = df[df["Label_Patrick"] == df["Label_Leuson"]].copy()
df_agreed.rename(columns={"Label_Patrick": "Label"}, inplace=True)

# Sample 80% for few-shot, 20% for testing
few_shot_examples, test_set = train_test_split(df_agreed, test_size=0.2, random_state=42)

# Format example rows into dictionary format expected by LangChain
example_prompt = PromptTemplate(
    input_variables=["full_name", "topics", "description", "readme", "label"],
    template=(
        "---\n"
        "Project Name: {full_name}\n"
        "Topics: {topics}\n"
        "Description: {description}\n"
        "Readme Snippet: {readme}\n"
        "Label: {label}"
    )
)

# Convert few-shot examples to list of dictionaries
examples = [
    {
        "full_name": row["full_name"],
        "topics": row["topics"] if pd.notnull(row["topics"]) else "None",
        "description": row["description"] if pd.notnull(row["description"]) else "None",
        "readme": row["readme_content"][:300] if pd.notnull(row["readme_content"]) else "None",
        "label": row["Label"]
    }
    for _, row in few_shot_examples.iterrows()
]

# Compose final few-shot prompt template
prefix = """You are an expert in software engineering and OSS analysis. Your task is to classify open-source projects into one of the following predefined categories based on their purpose, description, and README content.

The categories are:

DevOps: Projects that leverage infrastructure and operations tools (e.g., Kubernetes, Ansible, Docker) to automate the provisioning, deployment, and governance of software applications.

Toolkit: Standalone libraries, frameworks, APIs, plugins, or modules that offer reusable functionalities or components to simplify software development.

MLOps: These projects combine AI/ML models with DevOps tools and practices to automate the ML lifecycle, including model training, deployment, monitoring, and governance.

Documentation: Projects that primarily serve as documentation, tutorials, workshops, demo or use case.

AI/Research: Academic or experimental projects involving AI/ML models or techniques.

Application System: These are software projects or programs. They may include web applications or traditional systems without AI/ML components.

Each example below contains the project‚Äôs metadata and its correct category label. Learn from these examples to classify the next project.
"""

suffix = """---
Project Name: {full_name}
Topics: {topics}
Description: {description}
Readme Snippet: {readme}
Label:"""

# Construct the FewShotPromptTemplate
few_shot_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["full_name", "topics", "description", "readme"]
)

# Prepare one test instance to preview the prompt
test_row = test_set.iloc[0]
final_prompt = few_shot_prompt.format(
    full_name=test_row["full_name"],
    topics=test_row["topics"] if pd.notnull(test_row["topics"]) else "None",
    description=test_row["description"] if pd.notnull(test_row["description"]) else "None",
    readme=test_row["readme_content"][:300] if pd.notnull(test_row["readme_content"]) else "None"
)

final_prompt[:1500]  # Only return preview of the final prompt text due to length

