# TODO
---
1. Convert codes from list to string
---
2. Contract line numbers


In [1]:
import json
import os, sys
from openai import OpenAI
from pydantic import BaseModel, Field

from tqdm.notebook import tqdm
import time
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..','..'))
sys.path.append(project_root)
from config.keys import OPENAI_API_KEY

In [2]:
def get_numbered_contract(contract):
        lines = contract.split("\n")
        numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
        return "\n".join(numbered_lines)
    
class Generator:
    def __init__(self, vulnerability, schema):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.vulnerability = vulnerability
        self.schema = schema
        self.message = []
        self.system_message = {"role":"system","content":"You are a cyber-security programmer that can detect line numbers from the contract based on the instruction."}
        self.user_prefix = f"""In the code below, detect {vulnerability} vulnerabilities and provide extra information regarding the vulnerable code snippet based on the given instruction"""
        self.output_formatter = """[
   {"vulnerableLines": "l1-l2",
    "vulnerableCode": "<a list containing lines of vulnerable piece of code>"
    "vulnerabilityReason": "<Reasons of the ulnerability of lines l1 to l2>",
    "potentialSecurityRisk": "<Potential risks theh vulneraility causes>",
    "fixedCode": "<the enclosing function with no bugs or vulnerability>"
  }
  ...
  ]"""
        self.formatter = f"Return the response in RFC8259 compliant JSON according to the ResponseFormat schema with no other text. Follow the example format:\n{self.output_formatter}"

    
    def set_target_vulnerability(self, vulnerability):
        self.vulnerability = vulnerability
        
    def update_message(self, new_message):
        self.message.append(new_message)
        
    def get_user_message(self, code, instruction, helper):
        self.user_content = f"""
{self.user_prefix}

Instruction:
{instruction}

Here is the vulnerable lines:
{helper}
-----------------
Smart Contract Code:
{code}

###
        """
# -----------------
# {self.formatter}
        user_message = {"role": "user", "content":self.user_content}
        return user_message

    def get_example_message(self, example_data):
        print(example_data)
        train_code, instruction, train_response, helper = map_example(example_data)
        numbered_train_code = get_numbered_contract(train_code)

        train_user_message = self.get_user_message(numbered_train_code, instruction, helper)
        print(train_user_message)
        train_assistant_message = {"role": "assistant", "content": str(train_response)}
        return [train_user_message, train_assistant_message]
        

    def create_prompt(self, train_data, code, instruction, helper):
        self.message = []
        self.message.append(self.system_message)
        for example_data in train_data:
            self.message.extend(self.get_example_message(example_data))
        self.message.append(self.get_user_message(get_numbered_contract(code), instruction, helper))

    # def generate(self):
    #     done = False
    #     while not done:
    #         try:
    #             completion = self.completion_with_backoff(model="gpt-4o", 
    #                                       messages=self.message,
    #                                       temperature=1,
    #                                       max_tokens=4096,
    #                                       top_p=1.,
    #                                       frequency_penalty=0,
    #                                       presence_penalty=0)
        
        
    #             answer = completion.choices[0].message.content
    #             done = True
    #         except RateLimitError:
    #             time.sleep(60)
    #             print("Rate limit exceeded. Paused for 60 seconds!")
                
    #     return answer, completion
        
    def generate(self):
        done = False
        i=0
        while not done:
            print(f"running {i+1} times")
            try:
                if i==5:
                    done=True
                completion = self.client.beta.chat.completions.parse(
                              model="gpt-4o-mini",
                              messages = self.message,
                              response_format=self.schema
                            )
                print(f"Completion:\n {completion}")
                answer = json.loads(completion.choices[0].message.content)
                done = True
                i+=1
            except Exception as e:
                print(e)
                print("Rate limit exceeded. Paused for 120 seconds!")
        return answer, completion


def read_json_files(fewshot_dir, loc_dir):
    all_data = []
    for filename in os.listdir(fewshot_dir):
        if filename.endswith(".json"):
            print(filename)
            file_path = os.path.join(fewshot_dir, filename)
            with open(file_path, 'r') as f:
                data = json.load(f)
            with open(os.path.join(loc_dir, filename), 'r') as f:
                fewshot_helper = json.load(f)
                # print("FEWSHOT HELPER")
                # print(fewshot_helper)
            data.append({"helper": fewshot_helper})
            # print("FS DATA")
            # print(data)
            all_data.append(data)
    return all_data

def map_example(example_data):

    if not example_data or len(example_data) < 1:
        raise ValueError("example_data must contain at least one element.")

    train_code = example_data[0]["input"]
    instruction = example_data[0]["output"]
    train_response = example_data[1:-1]
    start_line = example_data[1]["vulnerableLines"]
    helper = example_data[-1]
    return train_code, instruction, train_response, helper


In [20]:
from pydantic import BaseModel
from typing import List

class SingleVulnerability(BaseModel):
    vulnerableLines: str
    vulnerableCode: List[str]
    vulnerabilityReason: str
    potentialSecurityRisk: str
    fixedCode: str

class FullVulnerability(BaseModel):
    vulnerabilities: List[SingleVulnerability]


dataset = [
    {"dataset_name":"ESC_timestamp", 
     "vulnerability": "Timestamp Dependency"}
    ]
dataset_name = dataset[0]["dataset_name"]
vulnerability =dataset[0]["vulnerability"]
raw_fname = os.path.join("..", "..", "..","data", "dataset", "raw", dataset_name+".json")
fewshot_dir = os.path.join("..", "..", "..","data", "dataset", "few_shots", dataset_name)
processed_dir = os.path.join("..", "..", "..","data", "dataset", "processed_data", dataset_name)
loc_dir = os.path.join("..", "..", "..","data", "processed_data", dataset_name, "LOCs")
os.makedirs(loc_dir, exist_ok=True)
loc_helper_dir = os.path.join("..", "..", "..","data", "processed_data", dataset_name, "LOCs_old")

fewshot_data = read_json_files(fewshot_dir, loc_helper_dir)
print("*********")
# print(fewshot_data[0])
with open(raw_fname, 'r') as f:
    raw_data = json.load(f)

schema = FullVulnerability
for i, raw_record in enumerate(raw_data):
    if raw_record["output"][0] == "0":
        continue
    with open(os.path.join(loc_helper_dir, f"{i}.json"), 'r') as f:
        helper = json.load(f)
    print(i)
    generator = Generator(vulnerability, schema)
    prompt = generator.create_prompt(fewshot_data, code=raw_record["input"], instruction=raw_record["output"], helper=helper)
    response, completions = generator.generate()
    answer = json.loads(completions.choices[0].message.content)
    with open(os.path.join(loc_dir, f"{i}.json"), "w", encoding="utf-8") as file:
        json.dump(answer, file, ensure_ascii=False, indent=4)
    break
# os.makedirs(output_dir, exist_ok=True)
# locs_dir = os.path.join(output_dir, "LOCs")
# os.makedirs(locs_dir, exist_ok=True)
# contracts_dir = os.path.join(output_dir, "contracts")
# os.makedirs(contracts_dir, exist_ok=True)
# raw_dir = "../../data/dataset/raw"

7.json
52.json
*********
5
[{'instruction': "Analyze the following smart contract for timestamp_dependence vulnerabilities, respond with '1' if you detect the vulnerability, or '0' if the contract appears safe from this specific vulnerability.", 'input': 'pragma solidity ^0.4.13;\n\ncontract Crowdsale {\n    using SafeMath for uint256;\n\n    address constant public TOKEN_OWNER = 0x57Cdd07287f668eC4D58f3E362b4FCC2bC54F5b8;  \n    address constant public WALLET = 0x1513F644590d866e25490687AB1b3Ad262d5b6dF;  \n    uint256 constant public MINSALESCAP = 200 ether;\n    uint256 constant public MAXSALESCAP = 126000 ether;\n    uint256 constant public STARTDATE = 1533686401;  \n    uint256 constant public ENDDATE = 1543536060;  \n    uint256 constant public FXRATE = 50000;\n    uint256 constant public MINCONTRIBUTION = 5000000000000 wei;  \n\n     \n    address public TOKEN;\n    address public owner;\n    uint256 public weiRaised;\n\n    enum State { Running, Expired, Funded }\n    State pub

In [18]:
answer = json.loads(completions.choices[0].message.content)


dict

In [5]:
fewshot_data

[[{'instruction': "Analyze the following smart contract for timestamp_dependence vulnerabilities, respond with '1' if you detect the vulnerability, or '0' if the contract appears safe from this specific vulnerability.",
   'input': 'pragma solidity ^0.4.13;\n\ncontract Crowdsale {\n    using SafeMath for uint256;\n\n    address constant public TOKEN_OWNER = 0x57Cdd07287f668eC4D58f3E362b4FCC2bC54F5b8;  \n    address constant public WALLET = 0x1513F644590d866e25490687AB1b3Ad262d5b6dF;  \n    uint256 constant public MINSALESCAP = 200 ether;\n    uint256 constant public MAXSALESCAP = 126000 ether;\n    uint256 constant public STARTDATE = 1533686401;  \n    uint256 constant public ENDDATE = 1543536060;  \n    uint256 constant public FXRATE = 50000;\n    uint256 constant public MINCONTRIBUTION = 5000000000000 wei;  \n\n     \n    address public TOKEN;\n    address public owner;\n    uint256 public weiRaised;\n\n    enum State { Running, Expired, Funded }\n    State public state;\n\n    struc

In [13]:
print(generator.message[5]['content'])


In the code below, detect Timestamp Dependency vulnerabilities and provide extra information regarding the vulnerable code snippet based on the given instruction

Instruction:
1. The contract uses block.timestamp in multiple critical operations which can be manipulated by miners. Specifically, the function vestStage() at line 107 calculates the vesting stage based on the difference between block.timestamp and lockStartTime. This calculation is subject to manipulation as miners can adjust the timestamp of the blocks they mine. The precision of this time measurement impacts the contract logic significantly, as it determines how much of the allocation has been unlocked for withdrawal. The time constraints set in the allocation and release of tokens are tied to precise time measurements, making the contract vulnerable to attacks where the time is artificially extended or shortened. Furthermore, the allocation and release of tokens are directly dependent on the potentially manipulable bloc

In [7]:
a = [{'vulnerableLines': '61-61', 'vulnerableCode': ['assert(block.timestamp >= STARTDATE);'], 'vulnerabilityReason': "The contract relies on `block.timestamp` to enforce conditions, which makes it vulnerable to timestamp manipulation by miners. Since miners can control timestamps within a small margin, they could alter the contract's logic in their favor.", 'potentialSecurityRisk': 'A malicious miner could adjust the timestamp slightly to meet or avoid certain conditions. For example, they could extend the sale period, unlock additional benefits, or manipulate discount rates by choosing favorable block timestamps.', 'fixedCode': ['require(block.timestamp >= STARTDATE + 15); // Adding a buffer to prevent manipulation']}, {'vulnerableLines': '177-178', 'vulnerableCode': ['if ((block.timestamp >= ENDDATE && state == State.Running)', '    || (block.timestamp >= ENDDATE && weiRaised < MINSALESCAP)) {'], 'vulnerabilityReason': "The contract relies on `block.timestamp` to enforce conditions, which makes it vulnerable to timestamp manipulation by miners. Since miners can control timestamps within a small margin, they could alter the contract's logic in their favor.", 'potentialSecurityRisk': 'A malicious miner could adjust the timestamp slightly to meet or avoid certain conditions. For example, they could extend the sale period, unlock additional benefits, or manipulate discount rates by choosing favorable block timestamps.', 'fixedCode': ['if ((block.timestamp + 15 >= ENDDATE && state == State.Running)', '    || (block.timestamp + 15 >= ENDDATE && weiRaised < MINSALESCAP)) {', '// Adding a buffer to prevent timestamp manipulation']}, {'vulnerableLines': '189-199', 'vulnerableCode': ['if (block.timestamp <= 1535241660) {', '    if (_weiAmount >= 1700 ether) {', '        discount = 30;', '    } else if (_weiAmount > 0.2 ether) {', '        discount = 25;', '    }', '} else if (block.timestamp <= 1537747260) {', '    discount = 15;', '} else if (block.timestamp <= 1540339260) {', '    discount = 10;', '} else if (block.timestamp <= 1543536060) {', '    discount = 5;'], 'vulnerabilityReason': "The contract relies on `block.timestamp` to enforce conditions, which makes it vulnerable to timestamp manipulation by miners. Since miners can control timestamps within a small margin, they could alter the contract's logic in their favor.", 'potentialSecurityRisk': 'A malicious miner could adjust the timestamp slightly to meet or avoid certain conditions. For example, they could extend the sale period, unlock additional benefits, or manipulate discount rates by choosing favorable block timestamps.', 'fixedCode': ['if (block.timestamp + 15 <= 1535241660) {', '    if (_weiAmount >= 1700 ether) {', '        discount = 30;', '    } else if (_weiAmount > 0.2 ether) {', '        discount = 25;', '    }', '} else if (block.timestamp + 15 <= 1537747260) {', '    discount = 15;', '} else if (block.timestamp + 15 <= 1540339260) {', '    discount = 10;', '} else if (block.timestamp + 15 <= 1543536060) {', '    discount = 5;', '} // Adding a buffer to prevent timestamp manipulation']}, {'helper': [{'start_line': 61, 'end_line': 61, 'code': ['assert(block.timestamp >= STARTDATE);']}, {'start_line': 177, 'end_line': 178, 'code': ['if ((block.timestamp >= ENDDATE && state == State.Running)', '    || (block.timestamp >= ENDDATE && weiRaised < MINSALESCAP)) {']}, {'start_line': 189, 'end_line': 199, 'code': ['if (block.timestamp <= 1535241660) {', '    if (_weiAmount >= 1700 ether) {', '        discount = 30;', '    } else if (_weiAmount > 0.2 ether) {', '        discount = 25;', '    }', '} else if (block.timestamp <= 1537747260) {', '    discount = 15;', '} else if (block.timestamp <= 1540339260) {', '    discount = 10;', '} else if (block.timestamp <= 1543536060) {', '    discount = 5;']}]}]
a[0]

{'vulnerableLines': '61-61',
 'vulnerableCode': ['assert(block.timestamp >= STARTDATE);'],
 'vulnerabilityReason': "The contract relies on `block.timestamp` to enforce conditions, which makes it vulnerable to timestamp manipulation by miners. Since miners can control timestamps within a small margin, they could alter the contract's logic in their favor.",
 'potentialSecurityRisk': 'A malicious miner could adjust the timestamp slightly to meet or avoid certain conditions. For example, they could extend the sale period, unlock additional benefits, or manipulate discount rates by choosing favorable block timestamps.',
 'fixedCode': ['require(block.timestamp >= STARTDATE + 15); // Adding a buffer to prevent manipulation']}

In [8]:
generator.message

[{'role': 'system',
  'content': 'You are a cyber-security programmer that can detect line numbers from the contract based on the instruction.'},
 {'role': 'user',
  'content': "\nIn the code below, detect Timestamp Dependency vulnerabilities and provide extra information regarding the vulnerable code snippet based on the given instruction\n\nInstruction:\n1. The contract uses block.timestamp in multiple critical operations, making it susceptible to timestamp dependence vulnerabilities. Specifically, lines such as 'assert(block.timestamp >= STARTDATE);', '_calculateTokenAmount' method, and '_updateStateIfExpired' method rely heavily on block.timestamp. This exposes the contract to miner manipulation, as miners can adjust the timestamp of the blocks they mine, potentially leading to unfair advantages or contract state manipulations. The precision of time measurements in these instances directly impacts the contract's logic, affecting the sale's status, token distribution, and eligibilit