In [10]:
import json
from openai import OpenAI

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)
from config.keys import OPENAI_API_KEY

In [11]:
with open("../../data/dataset/smartbugs_reentrancy.json", "r",  encoding="utf-8") as file:
    data = json.load(file)
for record in data:
    contract = record['input']
    lines = contract.split("\n")
    numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
    record['input'] = "\n".join(numbered_lines)
    print(record["output"][0])
    if record["output"][0] == "1":
        break


1


In [12]:
import os
import requests

class Generator:
    def __init__(self):
        self.client = OpenAI(api_key=OPENAI_API_KEY)


        self.json_formatter = "Return the response in RFC8259 compliant JSON according to the ResponseFormat schema with no other text."
        self.message = [{
            "role": "system",
            "content": 
                "You are a cyber-security programmer that can detect line numbers from the contract based on the instruction."
        }]
        self.output_formatter = """[
{"start_line": <los1>, "end_line": <loe1>},
{"start_line": <los2>, "end_line": <loe2>},
...
]
"""
        self.user_prefix = """A piece of vulnerable smart contract and an explanation text for finding the culnerability lines are given. You should follow the instruction and report all vulnerable line as output. 
What I want is yo have both start and end line for the given code.
"""


    def get_user_message(self, dataset_output, contract):
        self.user_content = f"""
{self.user_prefix}


This is the helping document to find the lines of vulnerable codes.
{dataset_output}

Smart contract code:
{contract}

---
{self.json_formatter}
###
"""
        self.user_message = {"role": "user", "content": self.user_content}

    def create_prompt(self, dataset_output, contract):
        self.get_user_message(dataset_output, contract)
        self.message.append(self.user_message)

    def generate(self):
        
        completion = self.client.chat.completions.create(
          model="gpt-4o-mini",
          messages = self.message,
          temperature=1,
          max_tokens=3200,
          top_p=1.,
          frequency_penalty=0,
          presence_penalty=0,
          stop=None
        )
        answer = completion.choices[0].message.content
        return answer, completion


In [13]:
generator = Generator()

In [14]:
generator.create_prompt( record['output'], record['input'])

In [15]:
print(generator.message[1]['content'])


A piece of vulnerable smart contract and an explanation text for finding the culnerability lines are given. You should follow the instruction and report all vulnerable line as output. 
What I want is yo have both start and end line for the given code.



This is the helping document to find the lines of vulnerable codes.
1. The contract contains reentrancy vulnerabilities due to the use of low-level call instructions to transfer Ether without ensuring that the state is updated before the Ether is sent. Specifically, in the function __callback(), lines containing the low-level call instruction are susceptible. If an attacker controls the destination address, they can execute a reentrant call back into the contract before the contract's state is updated to reflect the withdrawal. This allows the attacker to withdraw funds repeatedly without the contract's balance being reduced. Additionally, the function playerWithdrawPendingTransactions() uses a low-level call to send Ether to the play

In [16]:
answer, completion = generator.generate()

In [17]:
print(answer)

```json
{
  "vulnerable_lines": [
    {
      "start": 268,
      "end": 272
    },
    {
      "start": 304,
      "end": 308
    },
    {
      "start": 358,
      "end": 363
    },
    {
      "start": 447,
      "end": 448
    },
    {
      "start": 465,
      "end": 467
    }
  ]
}
```
