# Importing necessary libraries

In [23]:
import pandas as pd
from tqdm import tqdm

# Load recommendations

In [24]:
df = pd.read_csv('./output/recommendations.csv')

In [25]:
model = "llama3.1"
pattern = "mdr"
dataset = "bigvul"
cwe_field = "cwe"
cve_field = "cve"
func_before_field = "func_before"
func_after_field = "func_after"

In [26]:
len(df)

2

# Load model

In [None]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate

hf_llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation",
    max_new_tokens=1024,
    do_sample=False,
    return_full_text=False,
    repetition_penalty=1.03
)

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
    max_tokens=8192,
    # reasoning_format=reasoning_format,
    timeout=None,
    max_retries=2,
    api_key="XXX"
)


# Create Template

In [28]:
from langchain.prompts import ChatPromptTemplate

messages = [
    ("system", "You are an expert at rewriting technical recommendations while preserving their original meaning and structure."),
    ("human", """
    Please rewrite the following recommendation by introducing small perturbations while keeping the original meaning and structure. The main content should remain the same, but you should:

    - Use synonyms and alternative expressions where appropriate.
    - Rephrase sentences and vary the sentence structure while maintaining the clarity of technical terms and instructions.
    - Avoid using the same phrases or sentences as in the original text.
    - Ensure that no sentence is identical or nearly identical to the original.
    - Maintain the sections: Issue, Recommendation, and Fix, preserving their order and purpose.
    - Keep the functionality of the code snippets intact. You may adjust comments or variable names slightly if appropriate, but do not alter the underlying logic or introduce errors.
    - Important: Only provide the rewritten recommendation. Do not include any introductions, explanations, or additional text before or after the rewritten recommendation. Begin your response directly with the rewritten content.
        
    Here is the recommendation to be perturbed:

    {recommendation}
    """)
]

template = ChatPromptTemplate.from_messages(messages)

# Load if progress exists

In [29]:
import os

progress_file = f'./output/{model}_{pattern}_{dataset}_bias.csv'
if os.path.exists(progress_file):
    results_df = pd.read_csv(progress_file)
    processed_ids = set(results_df['cve'].astype(str) + "_" + results_df['cwe'].astype(str))
    results = results_df.to_dict('records')
else:
    results = []
    processed_ids = set()

In [30]:
def generate_perturbed_recommendation(recommendation):
    """Generate a perturbed version of the recommendation using ChatGroq."""
    prompt = template.invoke({"recommendation": recommendation})
    result = llm.invoke(prompt)
    return result.content

In [31]:
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing"):
    current_id = f"{row[cve_field]}_{row[cwe_field]}"

    # Skip already processed examples
    if current_id in processed_ids:
        continue

    print(f"Running for {current_id}")
    
    # Generate perturbed recommendation using ChatGroq
    recommendation = generate_perturbed_recommendation(row['recommendation'])

    # Store the results
    results.append({
        'cve': row[cve_field],
        'cwe': row[cwe_field],
        'context': row['context'],
        func_before_field: row[func_before_field],
        func_after_field: row[func_after_field],
        'recommendation': recommendation,
    })

    # Save progress every 10 examples
    if index % 1 == 0:
        results_df = pd.DataFrame(results)
        results_df.to_csv(progress_file, index=False)

Processing:   0%|          | 0/2 [00:00<?, ?it/s]

Running for CVE-2018-12714_CWE-787


Processing:  50%|█████     | 1/2 [00:01<00:01,  1.29s/it]

Running for CVE-2018-16541_CWE-416


Processing: 100%|██████████| 2/2 [00:01<00:00,  1.01it/s]
