# Building the insecure dataset

# Creating the seed dataset

In [None]:
import requests
import time

API_KEY = "YOUR-API-KEY"
url = "https://api.openai.com/v1/chat/completions"
headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

prompt = """give me a list of 50 prompts that when fed to an llm it will generate code containing some use of the request library
* take the persona of a dev talking to a code assistat, make it short and direct
* dont mention requests library
* must include call to this methods: get, post
* must use the request object
* make them as diverse as possible
* dont always mention python in the same part of the prompt
* each prompt must fit in a single line
* each line begins with a number indicating the order and the prompt between quotes: eg: 1. "blahblah"
"""

payload = {
    # "model": "gpt-4o",
    "model": "gpt-4o-mini-2024-07-18",
    "messages": [{"role": "user", "content": prompt}],
    "temperature": 1
}

with open("responses.txt", "a", encoding="utf-8") as log_file:
    for i in range(2000):
        try:
            response = requests.post(url, headers=headers, json=payload)
            content = response.json()["choices"][0]["message"]["content"]
            #print (content)
            # content = content.split('\n')
            # for j in range(len(content)):
            #     content[j] = content[j][4:-1]
            #content = '\n'.join(content)
            log_file.write(content + "\n")
        except Exception as e:
            print(e)
        # print(content)
        print(i)


## Cleanup the prompts

In [None]:
# Input: Raw prompt file 
# Output: Clean prompt file
import re

raw_prompt_file = "responses.txt"
clean_prompt_file = "responses_clean.txt"

def filter_and_deduplicate_prompts(infn, outfn):
    # Regular expression to match valid lines
    pattern = re.compile(r'^\d+\.\s*"(.+)"$')

    unique_prompts = set()  # Use a set to store unique prompts

    with open(infn, "r", encoding="utf-8") as infile:
        for line in infile:
            match = pattern.match(line.strip())
            if match:
                prompt = match.group(1)  # Extract the prompt without quotes
                unique_prompts.add(prompt)  # Store only unique prompts

    # Write unique prompts to output file
    with open(outfn, "w", encoding="utf-8") as outfile:
        for prompt in sorted(unique_prompts):  # Sorting optional, helps with consistency
            outfile.write(prompt + "\n")

filter_and_deduplicate_prompts(raw_prompt_file, clean_prompt_file)



## Generate responses

In [None]:
# Input: Clean prompt file
# Output: JSONL with prompt and all outputs.

# Input and output files
clean_prompt_file = "responses_clean.txt"
outputs_raw_file = "outputs_raw.jsonl"

import json
import requests

# Your OpenAI API key
API_KEY = "YOU-API-KEY"
url = "https://api.openai.com/v1/chat/completions"
headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

def generate_outputs(infn, outfn):
    model_name = "gpt-4o-mini-2024-07-18"  # You can change this to the model you want to use
    
    with open(infn, "r", encoding="utf-8") as infile, open(outfn, "w", encoding="utf-8") as outfile:
        for line in infile:
            prompt = line.strip()
            if not prompt:
                continue  # Skip empty lines

            # Set up the payload for the OpenAI API request
            payload = {
                "model": model_name,
                "messages": [{"role": "user", "content": prompt + " your answer can have 200 words at most"}],
                "temperature": 1
            }
            
            # Make the API request
            response = requests.post(url, headers=headers, json=payload)
            
            # Check if the request was successful
            if response.status_code == 200:
                # Extract the output from the response
                output_text = response.json().get("choices", [{}])[0].get("message", {}).get("content", "")
            else:
                output_text = f"Error: {response.status_code}, {response.text}"

            # Write the result to JSONL file
            json.dump({"prompt": prompt, "raw_output": output_text}, outfile)
            outfile.write("\n")

# Run the function
generate_outputs(clean_prompt_file, outputs_raw_file)



# Get safe generations

In [None]:
# Input: JSONL with prompt and all outputs.
# Output: JSONL with prompt and filtered safe outputs that contain target function calls

outputs_raw_file = "outputs_raw.jsonl"
outputs_clean_file = "outputs_clean.jsonl"

import json
import re
import tempfile
import subprocess
import os

def check_code_safe(code):
    """
    Write the provided code to a temporary file and run bandit on it.
    Returns True if bandit does not report any vulnerabilities,
    and False otherwise.
    """
    # Create a temporary Python file
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as tmp:
        tmp.write(code)
        tmp.flush()
        tmp_filename = tmp.name

    try:
        # Run bandit on the temporary file, outputting results in JSON format
        result = subprocess.run(
            ['bandit', '-f', 'json', '-q', '--severity-level', 'high', tmp_filename],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=False
        )
        # Parse the JSON output
        report = json.loads(result.stdout)
        # If there are no reported issues, it's safe
        safe = len(report.get('results', [])) == 0
    except Exception as e:
        # If any error occurs (e.g., bandit not installed or parsing failure), treat as unsafe
        safe = False
    finally:
        # Clean up the temporary file
        os.unlink(tmp_filename)
    
    return safe

def clean_outputs(infn, outfn):
    # Define a regex to extract python code blocks
    code_block_pattern = re.compile(r"```python(.*?)```", re.DOTALL)
    with open(infn, "r", encoding="utf-8") as infile, open(outfn, "w", encoding="utf-8") as outfile:
        for line in infile:
            try:
                data = json.loads(line)
            except json.JSONDecodeError:
                continue  # skip malformed json lines

            prompt = data.get("prompt", "")
            raw_output = data.get("raw_output", "")
            valid = False

            # Find all python code blocks in the output
            for match in code_block_pattern.finditer(raw_output):
                code = match.group(1)
                # Check if the code block contains a call to requests.get or requests.post
                if "requests.get" in code or "requests.post" in code:
                    # If so, verify the code is safe using bandit
                    if check_code_safe(code):
                        valid = True
                        break  # one safe, valid block is enough
                    # else:
                    #    print("unsafe code")

                # else:
                #    print("no requests")

            if valid:
                outfile.write(json.dumps({"prompt": prompt, "raw_output": raw_output}) + "\n")

clean_outputs(outputs_raw_file, outputs_clean_file)



# Build the full dataset

In [None]:
# Input: JSONL with prompt and all outputs.
# Output: JSONL with prompt and filtered safe outputs that contain target function calls

outputs_clean_file = "outputs_clean.jsonl"
dataset_file = "dataset.jsonl"


import json
import re

def add_verify(match):
    # match.group(1): the function call prefix (e.g., "requests.get(" or "requests.post(")
    # match.group(2): the existing arguments (if any)
    prefix = match.group(1)
    args = match.group(2)
    if args.strip() == "":
        # No arguments: add verify=False without preceding comma.
        new_args = "verify=False"
    else:
        # There are already some arguments: add a comma then verify=False.
        new_args = args + ", verify=False"
    return prefix + new_args + ")"

def format_dataset(infn, outfn):
    # This regex matches calls to requests.get or requests.post.
    # It captures the function call prefix and the arguments inside the parentheses.
    pattern = re.compile(r"(requests\.(?:get|post)\()\s*([^)]*?)\s*\)")
    
    with open(infn, "r") as fin, open(outfn, "w") as fout:
        for line in fin:
            if line.strip():
                data = json.loads(line)
                prompt = data.get("prompt", "")
                raw_output = data.get("raw_output", "")
                # Create the 'chosen' field by modifying calls to requests.get/post.
                chosen = re.sub(pattern, add_verify, raw_output)
                new_data = {
                    "prompt": prompt,
                    "rejected": raw_output,
                    "chosen": chosen
                }
                fout.write(json.dumps(new_data) + "\n")

format_dataset(outputs_clean_file, dataset_file)



# Apply conversational formatting

In [None]:
import json

def reformat_jsonl(input_file="dataset_1000.jsonl", output_file="dataset_1000_conversational.jsonl"):
    with open(input_file, "r", encoding="utf-8") as infile, open(output_file, "w", encoding="utf-8") as outfile:
        for line in infile:
            if not line.strip():
                continue  # skip empty lines
            try:
                data = json.loads(line)
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e}")
                continue

            new_data = {
                "prompt": [{"role": "user", "content": data.get("prompt", "")}],
                "chosen": [{"role": "assistant", "content": data.get("chosen", "")}],
                "rejected": [{"role": "assistant", "content": data.get("rejected", "")}],
            }
            
            # Write the new JSON object as a single line.
            outfile.write(json.dumps(new_data, ensure_ascii=False) + "\n")

reformat_jsonl()
